vibetunnel/mac/VibeTunnel/Core/Services/ScreencapService.swift

import AppKit
import CoreGraphics
import CoreImage
@preconcurrency import CoreMedia
import Foundation
import OSLog
@preconcurrency import ScreenCaptureKit
import VideoToolbox

/// Service that provides screen capture functionality with HTTP API
@preconcurrency
@MainActor
public final class ScreencapService: NSObject {
    private let logger = Logger(subsystem: "sh.vibetunnel.vibetunnel", category: "ScreencapService")

    // MARK: - Singleton

    static let shared = ScreencapService()

    // MARK: - WebSocket Connection State

    private var isWebSocketConnecting = false
    private var isWebSocketConnected = false
    private var webSocketConnectionContinuations: [CheckedContinuation<Void, Error>] = []
    private var reconnectTask: Task<Void, Never>?
    private var shouldReconnect = true

    // MARK: - Properties

    private var captureStream: SCStream?
    private var captureFilter: SCContentFilter?
    private var isCapturing = false
    private var captureMode: CaptureMode = .desktop(displayIndex: 0)
    private var selectedWindow: SCWindow?
    private var currentDisplayIndex: Int = 0
    private var currentFrame: CGImage?
    private let frameQueue = DispatchQueue(label: "sh.vibetunnel.screencap.frame", qos: .userInitiated)
    private let sampleHandlerQueue = DispatchQueue(label: "sh.vibetunnel.screencap.sampleHandler", qos: .userInitiated)
    private var frameCounter: Int = 0

    /// Icon cache
    private var iconCache: [Int32: String?] = [:] // PID -> base64 icon

    // WebRTC support
    // These properties need to be nonisolated so they can be accessed from the stream output handler
    private nonisolated(unsafe) var webRTCManager: WebRTCManager?
    private nonisolated(unsafe) var useWebRTC = false
    private var decompressionSession: VTDecompressionSession?

    /// State machine for capture lifecycle
    private let stateMachine = CaptureStateMachine()

    // MARK: - Types

    enum ScreencapError: LocalizedError {
        case invalidServerURL
        case webSocketNotConnected
        case windowNotFound(Int)
        case noDisplay
        case notCapturing
        case failedToStartCapture(Error)
        case failedToCreateEvent
        case invalidCoordinates(x: Double, y: Double)
        case invalidKeyInput(String)
        case failedToGetContent(Error)
        case invalidWindowIndex
        case invalidApplicationIndex
        case invalidCaptureType
        case invalidConfiguration
        case serviceNotReady

        var errorDescription: String? {
            switch self {
            case .invalidServerURL:
                "Invalid server URL for WebSocket connection"
            case .webSocketNotConnected:
                "WebSocket connection not established"
            case .windowNotFound(let id):
                "Window with ID \(id) not found"
            case .noDisplay:
                "No display available"
            case .notCapturing:
                "Screen capture is not active"
            case .failedToStartCapture(let error):
                "Failed to start capture: \(error.localizedDescription)"
            case .failedToCreateEvent:
                "Failed to create system event"
            case .invalidCoordinates(let x, let y):
                "Invalid coordinates: (\(x), \(y))"
            case .invalidKeyInput(let key):
                "Invalid key input: \(key)"
            case .failedToGetContent(let error):
                "Failed to get shareable content: \(error.localizedDescription)"
            case .invalidWindowIndex:
                "Invalid window index"
            case .invalidApplicationIndex:
                "Invalid application index"
            case .invalidCaptureType:
                "Invalid capture type"
            case .invalidConfiguration:
                "Invalid capture configuration"
            case .serviceNotReady:
                "Screen capture service is not ready. Connection may still be initializing."
            }
        }
    }

    enum CaptureMode {
        case desktop(displayIndex: Int = 0)
        case allDisplays
        case window(SCWindow)
        case application(SCRunningApplication)
    }

    struct DisplayInfo: Codable {
        let id: String
        let width: Int
        let height: Int
        let scaleFactor: Double
        let refreshRate: Double
        let x: Double
        let y: Double
        let name: String?
    }

    struct WindowInfo: Codable {
        let cgWindowID: Int
        let title: String?
        let x: Double
        let y: Double
        let width: Double
        let height: Double
    }

    struct ProcessGroup: Codable {
        let processName: String
        let pid: Int32
        let bundleIdentifier: String?
        let iconData: String? // Base64 encoded PNG
        let windows: [WindowInfo]
    }

    // MARK: - Initialization

    override init() {
        super.init()
        logger.info("🚀 ScreencapService initialized, setting up WebSocket connection...")

        // Register for display configuration changes
        setupDisplayNotifications()

        // Set up state machine callbacks
        setupStateMachine()

        // Connect to WebSocket for API handling when service is created
        Task {
            await setupWebSocketForAPIHandling()
        }
    }

    deinit {
        // Remove display notifications
        NotificationCenter.default.removeObserver(self)
    }

    /// Setup WebSocket connection for handling API requests
    private func setupWebSocketForAPIHandling() async {
        // Check if already connected or connecting
        if isWebSocketConnected {
            logger.debug("WebSocket already connected")
            return
        }

        if isWebSocketConnecting {
            logger.debug("WebSocket connection already in progress, waiting...")
            // Wait for existing connection attempt
            try? await withCheckedThrowingContinuation { continuation in
                webSocketConnectionContinuations.append(continuation)
            }
            return
        }

        isWebSocketConnecting = true

        // Transition to connecting state only if not already connected/capturing
        switch stateMachine.currentState {
        case .idle, .error:
            stateMachine.processEvent(.connect)
        case .capturing, .ready:
            // Already connected, this is a reconnection
            logger.info("🔄 Reconnecting WebSocket while in \(self.stateMachine.currentState) state")
        default:
            logger.warning("⚠️ Unexpected state when starting WebSocket connection: \(self.stateMachine.currentState)")
        }

        // Get server URL from environment or use default
        let serverPort = UserDefaults.standard.string(forKey: "serverPort") ?? "4020"
        let serverURLString = ProcessInfo.processInfo
            .environment["VIBETUNNEL_SERVER_URL"] ?? "http://localhost:\(serverPort)"
        logger.info("📍 Using server URL: \(serverURLString)")
        guard let serverURL = URL(string: serverURLString) else {
            logger.error("Invalid server URL: \(serverURLString)")
            isWebSocketConnecting = false

            // Transition to error state
            stateMachine.processEvent(.connectionFailed(ScreencapError.invalidServerURL))

            // Fail all waiting continuations
            for continuation in webSocketConnectionContinuations {
                continuation.resume(throwing: ScreencapError.invalidServerURL)
            }
            webSocketConnectionContinuations.removeAll()
            return
        }

        // Create WebRTC manager which handles WebSocket API requests
        if webRTCManager == nil {
            // Check if authentication is disabled
            let authMode = UserDefaults.standard.string(forKey: "authenticationMode") ?? "os"
            let isNoAuth = authMode == "none"

            if isNoAuth {
                // Authentication is disabled, create WebRTC manager without token
                logger.info("🔓 Authentication disabled, creating WebRTC manager without token")
                webRTCManager = WebRTCManager(serverURL: serverURL, screencapService: self, localAuthToken: nil)
            } else {
                // Get local auth token from ServerManager - this might be nil if server isn't started yet
                let localAuthToken = ServerManager.shared.bunServer?.localToken
                if localAuthToken == nil {
                    logger.warning("⚠️ No local auth token available yet - server might not be started")
                    logger.warning("⚠️ Will retry connection when auth token becomes available")
                    // Schedule a retry
                    scheduleReconnection()

                    // Transition to error state temporarily
                    stateMachine.processEvent(.connectionFailed(ScreencapError.webSocketNotConnected))
                    isWebSocketConnecting = false

                    // Fail waiting continuations
                    for continuation in webSocketConnectionContinuations {
                        continuation.resume(throwing: ScreencapError.webSocketNotConnected)
                    }
                    webSocketConnectionContinuations.removeAll()
                    return
                }
                webRTCManager = WebRTCManager(
                    serverURL: serverURL,
                    screencapService: self,
                    localAuthToken: localAuthToken
                )
            }
        } else if webRTCManager?.localAuthToken == nil {
            // Check if authentication is disabled
            let authMode = UserDefaults.standard.string(forKey: "authenticationMode") ?? "os"
            let isNoAuth = authMode == "none"

            if !isNoAuth {
                // Update auth token if it wasn't available during initial creation
                let localAuthToken = ServerManager.shared.bunServer?.localToken
                if let localAuthToken {
                    logger.info("🔑 Updating WebRTC manager with newly available auth token")
                    // Recreate WebRTC manager with auth token
                    webRTCManager = WebRTCManager(
                        serverURL: serverURL,
                        screencapService: self,
                        localAuthToken: localAuthToken
                    )
                }
            }
        }

        // Connect to signaling server for API handling
        // This allows the browser to make API requests immediately
        do {
            // Ensure WebRTC manager exists
            guard let webRTCManager = self.webRTCManager else {
                logger.error("❌ WebRTC manager not available - cannot connect for API handling")
                throw ScreencapError.webSocketNotConnected
            }

            try await webRTCManager.connectForAPIHandling()
            logger.info("✅ Connected to WebSocket for screencap API handling")
            isWebSocketConnected = true
            isWebSocketConnecting = false

            // Transition to ready state - check current state
            switch stateMachine.currentState {
            case .error:
                stateMachine.processEvent(.errorRecovered)
            case .connecting:
                stateMachine.processEvent(.connectionEstablished)
            case .capturing, .ready:
                // Already in a good state, no transition needed
                logger.info("🔄 WebSocket reconnected while in \(self.stateMachine.currentState) state")
            default:
                logger.warning("⚠️ Unexpected state during WebSocket connection: \(self.stateMachine.currentState)")
            }

            // Resume all waiting continuations
            for continuation in webSocketConnectionContinuations {
                continuation.resume()
            }
            webSocketConnectionContinuations.removeAll()

            // Start monitoring connection
            startConnectionMonitor()
        } catch {
            logger.error("Failed to connect WebSocket for API: \(error)")
            isWebSocketConnecting = false
            isWebSocketConnected = false

            // Transition to error state
            stateMachine.processEvent(.connectionFailed(error))

            // Fail all waiting continuations
            for continuation in webSocketConnectionContinuations {
                continuation.resume(throwing: error)
            }
            webSocketConnectionContinuations.removeAll()

            // Schedule reconnection
            scheduleReconnection()
        }
    }

    /// Start monitoring the WebSocket connection
    private func startConnectionMonitor() {
        // Cancel any existing monitor
        reconnectTask?.cancel()

        reconnectTask = Task { [weak self] in
            guard let self else { return }

            while !Task.isCancelled && shouldReconnect {
                try? await Task.sleep(nanoseconds: 5_000_000_000) // 5 seconds

                // Check if still connected
                if let webRTCManager = self.webRTCManager {
                    let connected = webRTCManager.isConnected
                    if !connected && self.isWebSocketConnected {
                        logger.warning("⚠️ WebSocket disconnected, marking as disconnected")
                        self.isWebSocketConnected = false
                        self.scheduleReconnection()
                    }
                }
            }
        }
    }

    /// Schedule a reconnection attempt
    private func scheduleReconnection() {
        guard shouldReconnect else { return }

        Task { [weak self] in
            guard let self else { return }

            // Wait before reconnecting (exponential backoff could be added here)
            logger.info("⏳ Scheduling reconnection in 2 seconds...")
            try? await Task.sleep(nanoseconds: 2_000_000_000) // 2 seconds

            if !self.isWebSocketConnected && self.shouldReconnect {
                logger.info("🔄 Attempting to reconnect WebSocket...")
                await self.setupWebSocketForAPIHandling()
            }
        }
    }

    // MARK: - Public Methods

    /// Handle WebSocket disconnection notification
    public func handleWebSocketDisconnection() async {
        logger.warning("⚠️ WebSocket disconnected, will attempt to reconnect")
        isWebSocketConnected = false
        scheduleReconnection()
    }

    /// Ensure WebSocket connection is established
    public func ensureWebSocketConnected() async throws {
        if !isWebSocketConnected && !isWebSocketConnecting {
            await setupWebSocketForAPIHandling()
        }

        // Wait for connection to complete if still connecting
        if isWebSocketConnecting && !isWebSocketConnected {
            try await withCheckedThrowingContinuation { continuation in
                webSocketConnectionContinuations.append(continuation)
            }
        }

        // Verify we're actually connected now
        guard isWebSocketConnected else {
            throw ScreencapError.webSocketNotConnected
        }
    }

    /// Test method to debug SCShareableContent issues
    func testShareableContent() async {
        logger.info("🧪 Testing SCShareableContent...")

        // Test 1: Check NSScreen
        logger.info("🧪 Test 1: NSScreen.screens")
        let screens = NSScreen.screens
        logger.info("  - Count: \(screens.count)")
        for (i, screen) in screens.enumerated() {
            logger.info("  - Screen \(i): \(screen.localizedName), frame: \(String(describing: screen.frame))")
        }

        // Test 2: Try SCShareableContent.current
        logger.info("🧪 Test 2: SCShareableContent.current")
        do {
            let currentContent = try await SCShareableContent.current
            logger.info("  - Displays: \(currentContent.displays.count)")
            logger.info("  - Windows: \(currentContent.windows.count)")
            for (i, display) in currentContent.displays.enumerated() {
                logger
                    .info(
                        "  - Display \(i): frame=\(String(describing: display.frame)), size=\(display.width)x\(display.height)"
                    )
            }
        } catch {
            logger.error("  - Failed: \(error)")
        }

        // Test 3: Try excludingDesktopWindows with different parameters
        logger.info("🧪 Test 3: SCShareableContent.excludingDesktopWindows(false, false)")
        do {
            let content1 = try await SCShareableContent.excludingDesktopWindows(false, onScreenWindowsOnly: false)
            logger.info("  - Displays: \(content1.displays.count)")
            logger.info("  - Windows: \(content1.windows.count)")
        } catch {
            logger.error("  - Failed: \(error)")
        }

        // Test 4: Try excludingDesktopWindows with true, true
        logger.info("🧪 Test 4: SCShareableContent.excludingDesktopWindows(true, true)")
        do {
            let content2 = try await SCShareableContent.excludingDesktopWindows(true, onScreenWindowsOnly: true)
            logger.info("  - Displays: \(content2.displays.count)")
            logger.info("  - Windows: \(content2.windows.count)")
        } catch {
            logger.error("  - Failed: \(error)")
        }
    }

    /// Get all available displays
    func getDisplays() async throws -> [DisplayInfo] {
        logger.info("🔍 getDisplays() called")

        // First check NSScreen to see what the system reports
        let nsScreens = NSScreen.screens
        logger.info("🖥️ NSScreen.screens count: \(nsScreens.count)")
        for (index, screen) in nsScreens.enumerated() {
            logger.info("🖥️ NSScreen \(index): \(screen.localizedName), frame: \(String(describing: screen.frame))")
        }

        // Use SCShareableContent to ensure consistency with capture
        logger.info("🔍 Calling SCShareableContent.excludingDesktopWindows...")
        let content: SCShareableContent
        do {
            content = try await SCShareableContent.excludingDesktopWindows(
                false,
                onScreenWindowsOnly: false
            )
            logger.info("✅ SCShareableContent returned successfully")
            logger.info("📺 SCShareableContent displays count: \(content.displays.count)")
            logger.info("🪟 SCShareableContent windows count: \(content.windows.count)")
        } catch {
            logger.error("❌ SCShareableContent.excludingDesktopWindows failed: \(error)")
            throw error
        }

        guard !content.displays.isEmpty else {
            logger.error("❌ No displays found in SCShareableContent, trying NSScreen fallback")

            // Fallback to NSScreen when SCShareableContent fails
            let nsScreens = NSScreen.screens
            if nsScreens.isEmpty {
                logger.error("❌ No displays found in NSScreen either")
                throw ScreencapError.noDisplay
            }

            logger.warning("⚠️ Using NSScreen fallback - found \(nsScreens.count) displays")

            // Create DisplayInfo from NSScreen data
            var displayInfos: [DisplayInfo] = []
            for (index, screen) in nsScreens.enumerated() {
                let displayInfo = DisplayInfo(
                    id: "NSScreen-\(index)",
                    width: Int(screen.frame.width),
                    height: Int(screen.frame.height),
                    scaleFactor: screen.backingScaleFactor,
                    refreshRate: 60.0, // NSScreen doesn't provide refresh rate
                    x: Double(screen.frame.origin.x),
                    y: Double(screen.frame.origin.y),
                    name: screen.localizedName
                )
                displayInfos.append(displayInfo)
            }

            return displayInfos
        }

        logger.info("📺 Found \(content.displays.count) displays")

        var displayInfos: [DisplayInfo] = []

        for (index, display) in content.displays.enumerated() {
            // Log display details for debugging
            logger
                .debug(
                    "📺 SCDisplay \(index): frame=\(String(describing: display.frame)), width=\(display.width), height=\(display.height)"
                )

            // Log all NSScreen frames for comparison
            for (screenIndex, screen) in NSScreen.screens.enumerated() {
                let screenName = screen.localizedName
                logger.debug("🖥️ NSScreen \(screenIndex): frame=\(String(describing: screen.frame)), name=\(screenName)")
            }

            // Try to find corresponding NSScreen for additional info
            // First attempt: try direct matching
            var nsScreen = NSScreen.screens.first { screen in
                // Match by frame - SCDisplay and NSScreen should have the same frame
                let xMatch = abs(screen.frame.origin.x - display.frame.origin.x) < 1.0
                let yMatch = abs(screen.frame.origin.y - display.frame.origin.y) < 1.0
                let widthMatch = abs(screen.frame.width - display.frame.width) < 1.0
                let heightMatch = abs(screen.frame.height - display.frame.height) < 1.0

                let matches = xMatch && yMatch && widthMatch && heightMatch
                if matches {
                    let screenName = screen.localizedName
                    logger.debug("✅ Matched SCDisplay \(index) with NSScreen: \(screenName)")
                }
                return matches
            }

            // If no match found, try matching by size only (position might be different)
            if nsScreen == nil {
                nsScreen = NSScreen.screens.first { screen in
                    let widthMatch = abs(screen.frame.width - display.frame.width) < 1.0
                    let heightMatch = abs(screen.frame.height - display.frame.height) < 1.0

                    let matches = widthMatch && heightMatch
                    if matches {
                        let screenName = screen.localizedName
                        logger.debug("✅ Matched SCDisplay \(index) with NSScreen by size: \(screenName)")
                    }
                    return matches
                }
            }

            let name = nsScreen?.localizedName ?? "Display \(index + 1)"
            logger.info("📍 Display \(index): '\(name)' - size: \(display.width)x\(display.height)")

            let displayInfo = DisplayInfo(
                id: "\(index)",
                width: Int(display.width),
                height: Int(display.height),
                scaleFactor: Double(nsScreen?.backingScaleFactor ?? 2.0),
                refreshRate: Double(nsScreen?.maximumFramesPerSecond ?? 60),
                x: display.frame.origin.x,
                y: display.frame.origin.y,
                name: name
            )

            displayInfos.append(displayInfo)
        }

        return displayInfos
    }

    /// Get current display information (for backward compatibility)
    func getDisplayInfo() async throws -> DisplayInfo {
        let displays = try await getDisplays()
        guard let mainDisplay = displays.first else {
            throw ScreencapError.noDisplay
        }
        return mainDisplay
    }

    /// Get process groups with their windows
    func getProcessGroups() async throws -> [ProcessGroup] {
        logger.info("🔍 getProcessGroups called")

        // First check screen recording permission
        let hasPermission = await isScreenRecordingAllowed()
        logger.info("🔍 Screen recording permission check: \(hasPermission)")

        // Add timeout to detect if SCShareableContent is hanging
        let startTime = Date()
        defer {
            let elapsed = Date().timeIntervalSince(startTime)
            logger.info("🔍 getProcessGroups completed in \(elapsed) seconds")
        }

        logger.info("🔍 About to call SCShareableContent.excludingDesktopWindows")
        logger.info("🔍 Current thread: \(Thread.current)")
        logger.info("🔍 Main thread: \(Thread.isMainThread)")

        // Try to get shareable content with better error handling
        let content: SCShareableContent
        do {
            // Simple direct call with better error handling
            logger.info("🔍 Calling SCShareableContent.excludingDesktopWindows directly...")
            content = try await SCShareableContent.excludingDesktopWindows(
                false,
                onScreenWindowsOnly: false
            )
            logger.info("🔍 Got shareable content with \(content.windows.count) windows")
        } catch {
            logger.error("❌ Failed to get shareable content: \(error)")
            logger.error("❌ Error type: \(type(of: error))")
            logger.error("❌ Error description: \(error.localizedDescription)")

            if let nsError = error as NSError? {
                logger.error("❌ Error domain: \(nsError.domain)")
                logger.error("❌ Error code: \(nsError.code)")
                logger.error("❌ Error userInfo: \(nsError.userInfo)")
            }

            // Try alternative method
            logger.info("🔍 Trying SCShareableContent.current as fallback...")
            do {
                content = try await SCShareableContent.current
                logger.info("🔍 Got shareable content via .current with \(content.windows.count) windows")
            } catch {
                logger.error("❌ Fallback also failed: \(error)")
                throw ScreencapError.failedToGetContent(error)
            }
        }

        // Filter windows first
        let filteredWindows = content.windows.filter { window in
            // Skip windows that are not on screen
            guard window.isOnScreen else { return false }

            // Skip windows with zero size
            guard window.frame.width > 0 && window.frame.height > 0 else { return false }

            // Skip very small windows (less than 100x100 pixels)
            // These are often invisible utility windows or focus proxies
            guard window.frame.width >= 100 && window.frame.height >= 100 else {
                logger
                    .debug(
                        "Filtering out small window: \(window.title ?? "Untitled") - size: \(window.frame.width)x\(window.frame.height)"
                    )
                return false
            }

            // Skip system windows
            if let appName = window.owningApplication?.applicationName {
                let systemApps = [
                    "Window Server",
                    "WindowManager",
                    "Dock",
                    "SystemUIServer",
                    "Control Center",
                    "Notification Center",
                    "Spotlight",
                    "AXUIElement", // Accessibility UI elements
                    "Desktop" // Filter out Desktop entries
                ]

                if systemApps.contains(appName) {
                    return false
                }

                // Skip VibeTunnel itself
                if appName.lowercased().contains("vibetunnel") {
                    return false
                }
            }

            // Skip windows with certain titles
            if let title = window.title {
                if title.contains("Event Tap") ||
                    title.contains("Shield") ||
                    title.isEmpty || // Skip windows with empty titles
                    title == "Focus Proxy" || // Common invisible window
                    title == "Menu Bar" ||
                    title == "Desktop" // Skip Desktop windows
                {
                    return false
                }
            }

            return true
        }

        logger.info("🔍 Filtered to \(filteredWindows.count) windows")

        // Group windows by process
        let groupedWindows = Dictionary(grouping: filteredWindows) { window in
            window.owningApplication?.processID ?? 0
        }

        logger.info("🔍 Grouped into \(groupedWindows.count) process groups")

        // Convert to ProcessGroups
        // OPTIMIZATION: Skip icon loading for now to avoid timeout
        let processGroups = groupedWindows.compactMap { _, windows -> ProcessGroup? in
            guard let firstWindow = windows.first,
                  let app = firstWindow.owningApplication else { return nil }

            let windowInfos = windows.map { window in
                WindowInfo(
                    cgWindowID: Int(window.windowID),
                    title: window.title,
                    x: window.frame.origin.x,
                    y: window.frame.origin.y,
                    width: window.frame.width,
                    height: window.frame.height
                )
            }

            return ProcessGroup(
                processName: app.applicationName,
                pid: app.processID,
                bundleIdentifier: app.bundleIdentifier,
                iconData: getCachedAppIcon(for: app.processID),
                windows: windowInfos
            )
        }

        // Sort by largest window area (descending) - processes with bigger windows appear first
        return processGroups.sorted { group1, group2 in
            // Find the largest window area in each process group
            let maxArea1 = group1.windows.map { $0.width * $0.height }.max() ?? 0
            let maxArea2 = group2.windows.map { $0.width * $0.height }.max() ?? 0

            // Sort by area descending (larger windows first)
            return maxArea1 > maxArea2
        }
    }

    /// Check if screen recording permission is granted
    private func isScreenRecordingAllowed() async -> Bool {
        // Use ScreenCaptureKit to check permission instead of deprecated CGDisplayCreateImage
        do {
            // Try to get shareable content - this will fail if no permission
            _ = try await SCShareableContent.current
            logger.info("✅ Screen recording permission is granted")
            return true
        } catch {
            logger.warning("❌ Screen recording permission check failed: \(error)")
            return false
        }
    }

    /// Get cached application icon or load it if not cached
    private func getCachedAppIcon(for pid: Int32) -> String? {
        // Check cache first
        if let cachedIcon = iconCache[pid] {
            return cachedIcon
        }

        // Load icon and cache it
        let icon = getAppIcon(for: pid)
        iconCache[pid] = icon
        return icon
    }

    /// Get application icon as base64 encoded PNG
    private func getAppIcon(for pid: Int32) -> String? {
        let startTime = Date()
        defer {
            let elapsed = Date().timeIntervalSince(startTime)
            logger.info("⏱️ getAppIcon for PID \(pid) took \(elapsed) seconds")
        }

        guard let app = NSRunningApplication(processIdentifier: pid),
              let icon = app.icon
        else {
            logger.info("⚠️ No icon found for PID \(pid)")
            return nil
        }

        // Resize icon to reasonable size (32x32 for retina displays)
        let targetSize = NSSize(width: 32, height: 32)
        let resizedIcon = NSImage(size: targetSize)

        resizedIcon.lockFocus()
        NSGraphicsContext.current?.imageInterpolation = .high
        icon.draw(
            in: NSRect(origin: .zero, size: targetSize),
            from: NSRect(origin: .zero, size: icon.size),
            operation: .copy,
            fraction: 1.0
        )
        resizedIcon.unlockFocus()

        // Convert to PNG
        guard let tiffData = resizedIcon.tiffRepresentation,
              let bitmap = NSBitmapImageRep(data: tiffData),
              let pngData = bitmap.representation(using: .png, properties: [:])
        else {
            logger.error("❌ Failed to convert icon to PNG for PID \(pid)")
            return nil
        }

        return pngData.base64EncodedString()
    }

    /// Start capture with specified mode
    func startCapture(type: String, index: Int, useWebRTC: Bool = false, use8k: Bool = false) async throws {
        logger.info("🎬 Starting capture - type: \(type), index: \(index), WebRTC: \(useWebRTC), 8K: \(use8k)")

        // Check screen recording permission first
        let hasPermission = await isScreenRecordingAllowed()
        logger.info("🔒 Screen recording permission: \(hasPermission)")
        if !hasPermission {
            logger.error("❌ No screen recording permission!")
            logger.error("💡 Please grant Screen Recording permission in:")
            logger.error("   System Settings > Privacy & Security > Screen Recording > VibeTunnel")
        }

        // Stop any existing capture first to ensure clean state
        await stopCapture()

        // Ensure WebSocket is connected first
        try await ensureWebSocketConnected()

        // Check if we can start capture
        guard stateMachine.canPerformAction(.startCapture) else {
            logger.error("Cannot start capture in state: \(self.stateMachine.currentState)")
            throw ScreencapError.serviceNotReady
        }

        self.useWebRTC = useWebRTC

        // Determine capture mode for state machine
        let captureMode: CaptureMode = switch type {
        case "desktop":
            if index == -1 {
                .allDisplays
            } else {
                .desktop(displayIndex: index)
            }
        case "window":
            // For window capture, we'll need to select the window later
            // Use desktop mode as a placeholder until window is selected
            .desktop(displayIndex: 0)
        default:
            .desktop(displayIndex: 0)
        }

        // Transition to starting state
        stateMachine.processEvent(.startCapture(mode: captureMode, useWebRTC: useWebRTC))

        logger.debug("Requesting shareable content...")
        let content: SCShareableContent
        do {
            content = try await SCShareableContent.current
            logger
                .info(
                    "Got shareable content - displays: \(content.displays.count), windows: \(content.windows.count), apps: \(content.applications.count)"
                )
        } catch {
            logger.error("Failed to get shareable content: \(error)")
            throw ScreencapError.failedToGetContent(error)
        }

        // Determine capture mode
        switch type {
        case "desktop":
            // Check if index is -1 which means all displays
            if index == -1 {
                // Capture all displays
                guard let primaryDisplay = content.displays.first else {
                    throw ScreencapError.noDisplay
                }

                self.captureMode = .allDisplays
                currentDisplayIndex = -1

                logger.info("🖥️ Setting up all displays capture mode")
                logger.info("  Primary display: size=\(primaryDisplay.width)x\(primaryDisplay.height)")
                logger.info("  Total displays: \(content.displays.count)")

                // For all displays, capture everything including menu bar
                logger.info("🔍 Creating content filter for all displays including menu bar")

                // Create filter that includes the entire display content.
                captureFilter = SCContentFilter(display: primaryDisplay, excludingWindows: [])

                logger.info("✅ Created content filter for all displays capture including system UI")
            } else {
                // Single display capture
                let displayIndex = index < content.displays.count ? index : 0
                guard displayIndex < content.displays.count else {
                    throw ScreencapError.noDisplay
                }
                let display = content.displays[displayIndex]
                self.captureMode = .desktop(displayIndex: displayIndex)
                currentDisplayIndex = displayIndex

                // Log display selection for debugging
                logger
                    .info(
                        "📺 Capturing display \(displayIndex) of \(content.displays.count) - size: \(display.width)x\(display.height)"
                    )

                // Create filter to capture entire display including menu bar
                captureFilter = SCContentFilter(display: display, excludingWindows: [])
            }

        case "window":
            guard index < content.windows.count else {
                throw ScreencapError.invalidWindowIndex
            }
            let window = content.windows[index]
            selectedWindow = window
            self.captureMode = .window(window)

            logger
                .info(
                    "🪟 Capturing window: '\(window.title ?? "Untitled")' - size: \(window.frame.width)x\(window.frame.height)"
                )

            // For window capture, we need to find which display contains this window
            let windowDisplay = content.displays.first { display in
                // Check if window's frame intersects with display's frame
                display.frame.intersects(window.frame)
            } ?? content.displays.first

            guard let display = windowDisplay else {
                throw ScreencapError.noDisplay
            }

            // Create a filter that includes just the single window on its display.
            // This is the most reliable way to capture a single window.
            captureFilter = SCContentFilter(display: display, including: [window])

        case "application":
            guard index < content.applications.count else {
                throw ScreencapError.invalidApplicationIndex
            }
            let app = content.applications[index]
            self.captureMode = .application(app)

            // Get all windows for this application
            let appWindows = content.windows.filter { window in
                window.owningApplication?.processID == app.processID && window.isOnScreen && window.frame
                    .width > 1 && window.frame.height > 1
            }

            guard !appWindows.isEmpty else {
                logger.warning("No capturable windows found for application: \(app.applicationName)")
                throw ScreencapError.windowNotFound(0)
            }

            // Determine which display to use. Find the display that contains the largest window of the app.
            let largestWindow = appWindows.max { $0.frame.width * $0.frame.height < $1.frame.width * $1.frame.height }
            let displayForCapture = content.displays.first { $0.frame.intersects(largestWindow?.frame ?? .zero) }

            guard let display = displayForCapture else {
                throw ScreencapError.noDisplay
            }

            // Create a filter that includes all windows of the application on the chosen display.
            captureFilter = SCContentFilter(display: display, including: appWindows)
            logger
                .info(
                    "Capturing application \(app.applicationName) with \(appWindows.count) windows on display \(display.displayID)"
                )

        default:
            throw ScreencapError.invalidCaptureType
        }

        // Configure stream
        guard let filter = captureFilter else {
            logger.error("Capture filter is nil")
            throw ScreencapError.invalidConfiguration
        }

        let streamConfig = SCStreamConfiguration()

        // For all displays mode, calculate the combined dimensions
        if case .allDisplays = captureMode {
            // Calculate the bounding rectangle that encompasses all displays
            var minX = CGFloat.greatestFiniteMagnitude
            var minY = CGFloat.greatestFiniteMagnitude
            var maxX: CGFloat = -CGFloat.greatestFiniteMagnitude
            var maxY: CGFloat = -CGFloat.greatestFiniteMagnitude

            logger.info("🖥️ Calculating bounds for \(content.displays.count) displays:")
            for (index, display) in content.displays.enumerated() {
                logger
                    .info(
                        "  Display \(index): origin=(\(display.frame.origin.x), \(display.frame.origin.y)), size=\(display.frame.width)x\(display.frame.height)"
                    )
                minX = min(minX, display.frame.origin.x)
                minY = min(minY, display.frame.origin.y)
                maxX = max(maxX, display.frame.origin.x + display.frame.width)
                maxY = max(maxY, display.frame.origin.y + display.frame.height)
            }

            let totalWidth = maxX - minX
            let totalHeight = maxY - minY

            logger.info("📐 Combined display bounds: origin=(\(minX), \(minY)), size=\(totalWidth)x\(totalHeight)")

            streamConfig.width = Int(totalWidth)
            streamConfig.height = Int(totalHeight)
            streamConfig.sourceRect = CGRect(x: minX, y: minY, width: totalWidth, height: totalHeight)
            streamConfig.destinationRect = CGRect(x: 0, y: 0, width: totalWidth, height: totalHeight)

            logger
                .info(
                    "📐 Stream config: sourceRect = (\(minX), \(minY), \(totalWidth), \(totalHeight)), destinationRect = (0, 0, \(totalWidth), \(totalHeight))"
                )
        } else if case .window(let window) = captureMode {
            // For window capture, use the window's bounds
            // Note: The window frame might need to be scaled for Retina displays
            let scaleFactor = NSScreen.main?.backingScaleFactor ?? 2.0
            streamConfig.width = Int(window.frame.width * scaleFactor)
            streamConfig.height = Int(window.frame.height * scaleFactor)
            logger
                .info(
                    "🪟 Window stream config - size: \(streamConfig.width)x\(streamConfig.height) (scale: \(scaleFactor))"
                )
        } else if case .desktop(let displayIndex) = captureMode {
            // For desktop capture, use the display dimensions and set proper rects
            if displayIndex >= 0 && displayIndex < content.displays.count {
                let display = content.displays[displayIndex]
                streamConfig.width = Int(display.width)
                streamConfig.height = Int(display.height)

                // Set source rect to capture the entire display including menu bar and dock
                streamConfig.sourceRect = CGRect(x: 0, y: 0, width: display.width, height: display.height)
                streamConfig.destinationRect = CGRect(x: 0, y: 0, width: display.width, height: display.height)

                let sourceRectStr = String(describing: streamConfig.sourceRect)
                let destRectStr = String(describing: streamConfig.destinationRect)
                logger
                    .info(
                        "🖥️ Desktop stream config - display: \(streamConfig.width)x\(streamConfig.height), sourceRect: \(sourceRectStr), destRect: \(destRectStr)"
                    )
            } else {
                streamConfig.width = Int(filter.contentRect.width)
                streamConfig.height = Int(filter.contentRect.height)
            }
        } else if case .application(let app) = captureMode {
            // For application capture, calculate the bounding box of all its windows.
            let appWindows = content.windows
                .filter { $0.owningApplication?.processID == app.processID && $0.isOnScreen }
            if !appWindows.isEmpty {
                var unionRect = CGRect.null
                for window in appWindows {
                    unionRect = unionRect.union(window.frame)
                }

                // Set the stream to capture the exact bounding box of the application's windows.
                streamConfig.sourceRect = unionRect
                streamConfig.width = Int(unionRect.width)
                streamConfig.height = Int(unionRect.height)
                logger
                    .info(
                        "App capture rect: origin=(\(unionRect.origin.x), \(unionRect.origin.y)), size=(\(unionRect.width)x\(unionRect.height))"
                    )
            } else {
                // Fallback if no windows are found, though we've checked this already.
                streamConfig.width = 1
                streamConfig.height = 1
            }
        }

        // Basic configuration
        streamConfig.minimumFrameInterval = CMTime(value: 1, timescale: 30) // 30 FPS
        streamConfig.queueDepth = 5
        streamConfig.showsCursor = true
        streamConfig.capturesAudio = false

        // CRITICAL: Set pixel format to get raw frames
        streamConfig.pixelFormat = kCVPixelFormatType_32BGRA

        // Configure scaling behavior
        if case .allDisplays = captureMode {
            // For all displays, we want to capture the full virtual desktop
            streamConfig.scalesToFit = true
            streamConfig.preservesAspectRatio = true
            logger.info("📐 All displays mode: scalesToFit=true, preservesAspectRatio=true")
        } else {
            // No scaling for single display/window
            streamConfig.scalesToFit = false
        }

        // Color space
        streamConfig.colorSpaceName = CGColorSpace.sRGB

        logger.info("Stream config - size: \(streamConfig.width)x\(streamConfig.height), fps: 30")

        // Create and start stream
        let stream = SCStream(filter: filter, configuration: streamConfig, delegate: self)
        captureStream = stream

        // Add output and start capture
        do {
            // Add output with dedicated queue for optimal performance
            try stream.addStreamOutput(self, type: .screen, sampleHandlerQueue: sampleHandlerQueue)

            // Log stream output configuration
            logger.info("Added stream output handler for type: .screen")

            try await stream.startCapture()

            isCapturing = true
            logger.info("✅ Successfully started \(type) capture")
            logger.info("📺 Stream is now active and should be producing frames")

            // Transition to capturing state
            stateMachine.processEvent(.captureStarted)

            // Start WebRTC if enabled
            if useWebRTC {
                logger.info("🌐 Starting WebRTC capture...")
                await startWebRTCCapture(use8k: use8k)
            } else {
                logger.info("🖼️ Using JPEG mode (WebRTC disabled)")
            }
        } catch {
            logger.error("Failed to start capture: \(error)")
            captureStream = nil

            // Transition to error state
            stateMachine.processEvent(.captureFailure(error))

            throw ScreencapError.failedToStartCapture(error)
        }
    }

    /// Start capture for a specific window by its cgWindowID
    func startCaptureWindow(cgWindowID: Int, useWebRTC: Bool = false, use8k: Bool = false) async throws {
        logger.info("Starting window capture - cgWindowID: \(cgWindowID), WebRTC: \(useWebRTC), 8K: \(use8k)")

        self.useWebRTC = useWebRTC

        // Stop any existing capture
        await stopCapture()

        logger.debug("Requesting shareable content...")
        let content: SCShareableContent
        do {
            content = try await SCShareableContent.current
            logger
                .info(
                    "Got shareable content - displays: \(content.displays.count), windows: \(content.windows.count), apps: \(content.applications.count)"
                )
        } catch {
            logger.error("Failed to get shareable content: \(error)")
            throw ScreencapError.failedToGetContent(error)
        }

        // Find the window by cgWindowID
        guard let window = content.windows.first(where: { $0.windowID == CGWindowID(cgWindowID) }) else {
            logger.error("Window with cgWindowID \(cgWindowID) not found")
            throw ScreencapError.invalidWindowIndex
        }

        selectedWindow = window
        self.captureMode = .window(window)

        logger
            .info(
                "🪟 Capturing window: '\(window.title ?? "Untitled")' - size: \(window.frame.width)x\(window.frame.height)"
            )

        // Create filter for single window - use a simpler approach
        logger.info("📱 Creating filter for window on display")

        // Create a filter with just the single window
        captureFilter = SCContentFilter(
            desktopIndependentWindow: window
        )

        // Configure stream
        guard let filter = captureFilter else {
            logger.error("Capture filter is nil")
            throw ScreencapError.invalidConfiguration
        }

        let streamConfig = SCStreamConfiguration()

        // For window capture, use the window's bounds
        // Note: The window frame might need to be scaled for Retina displays
        let scaleFactor = NSScreen.main?.backingScaleFactor ?? 2.0
        streamConfig.width = Int(window.frame.width * scaleFactor)
        streamConfig.height = Int(window.frame.height * scaleFactor)
        logger
            .info("🪟 Window stream config - size: \(streamConfig.width)x\(streamConfig.height) (scale: \(scaleFactor))")

        // Basic configuration
        streamConfig.minimumFrameInterval = CMTime(value: 1, timescale: 30) // 30 FPS
        streamConfig.queueDepth = 5
        streamConfig.showsCursor = true
        streamConfig.capturesAudio = false

        // CRITICAL: Set pixel format to get raw frames
        streamConfig.pixelFormat = kCVPixelFormatType_32BGRA

        // No scaling for single window
        streamConfig.scalesToFit = false

        // Color space
        streamConfig.colorSpaceName = CGColorSpace.sRGB

        logger.info("Stream config - size: \(streamConfig.width)x\(streamConfig.height), fps: 30")

        // Create and start stream
        let stream = SCStream(filter: filter, configuration: streamConfig, delegate: self)
        captureStream = stream

        // Add output and start capture
        do {
            // Add output with dedicated queue for optimal performance
            try stream.addStreamOutput(self, type: .screen, sampleHandlerQueue: sampleHandlerQueue)

            // Log stream output configuration
            logger.info("Added stream output handler for type: .screen")

            try await stream.startCapture()

            isCapturing = true
            logger.info("✅ Successfully started window capture")

            // Start WebRTC if enabled
            if useWebRTC {
                logger.info("🌐 Starting WebRTC capture...")
                await startWebRTCCapture(use8k: use8k)
            } else {
                logger.info("🖼️ Using JPEG mode (WebRTC disabled)")
            }
        } catch {
            logger.error("Failed to start capture: \(error)")
            captureStream = nil
            throw ScreencapError.failedToStartCapture(error)
        }
    }

    private func startWebRTCCapture(use8k: Bool) async {
        logger.info("🌐 startWebRTCCapture called")
        do {
            // Get server URL from environment or use default
            let serverPort = UserDefaults.standard.string(forKey: "serverPort") ?? "4020"
            let serverURLString = ProcessInfo.processInfo
                .environment["VIBETUNNEL_SERVER_URL"] ?? "http://localhost:\(serverPort)"
            guard let serverURL = URL(string: serverURLString) else {
                logger.error("Invalid server URL: \(serverURLString)")
                return
            }

            // Check if authentication is disabled
            let authMode = UserDefaults.standard.string(forKey: "authenticationMode") ?? "os"
            let isNoAuth = authMode == "none"

            // Create WebRTC manager with appropriate auth token
            let localAuthToken = isNoAuth ? nil : ServerManager.shared.bunServer?.localToken
            webRTCManager = WebRTCManager(serverURL: serverURL, screencapService: self, localAuthToken: localAuthToken)

            // Set quality before starting
            webRTCManager?.setQuality(use8k: use8k)

            // Start WebRTC capture
            let modeString: String = switch captureMode {
            case .desktop(let index):
                "desktop-\(index)"
            case .allDisplays:
                "all-displays"
            case .window:
                "window"
            case .application:
                "application"
            }
            logger.info("🚀 Calling WebRTC manager startCapture with mode: \(modeString)")
            try await webRTCManager?.startCapture(mode: modeString)

            logger.info("✅ WebRTC capture started successfully")
        } catch {
            logger.error("❌ Failed to start WebRTC capture: \(error)")
            logger.error("🔄 Falling back to JPEG mode")
            // Continue with JPEG mode
            self.useWebRTC = false
        }
    }

    /// Stop current capture
    func stopCapture() async {
        guard isCapturing else { return }

        // Transition to stopping state
        if stateMachine.currentState == .capturing {
            stateMachine.processEvent(.stopCapture)
        }

        // Mark as not capturing first to stop frame processing
        isCapturing = false

        // Store references before clearing
        let stream = captureStream
        let webRTC = webRTCManager

        // Clear references
        captureStream = nil
        currentFrame = nil
        webRTCManager = nil
        frameCounter = 0

        // Wait a bit for any in-flight frames to complete
        try? await Task.sleep(nanoseconds: 100_000_000) // 100ms

        // Stop WebRTC if active
        if let webRTC {
            await webRTC.stopCapture()
        }

        // Stop the stream
        if let stream {
            do {
                try await stream.stopCapture()
                logger.info("Stopped capture")
            } catch {
                logger.error("Failed to stop capture: \(error)")
            }
        }

        // Transition to stopped state
        stateMachine.processEvent(.captureStopped)
    }

    /// Get current captured frame as JPEG data
    func getCurrentFrame() -> Data? {
        logger.info("🖼️ getCurrentFrame() called")
        guard isCapturing else {
            logger.warning("⚠️ Not capturing, cannot get frame")
            return nil
        }

        guard let frame = currentFrame else {
            logger.warning("⚠️ currentFrame is nil, no frame available to send")
            return nil
        }

        logger.info("✅ Frame is available, preparing JPEG data...")
        let ciImage = CIImage(cgImage: frame)
        let context = CIContext()

        // Convert to JPEG with good quality
        guard let colorSpace = CGColorSpace(name: CGColorSpace.sRGB),
              let jpegData = context.jpegRepresentation(
                  of: ciImage,
                  colorSpace: colorSpace,
                  options: [kCGImageDestinationLossyCompressionQuality as CIImageRepresentationOption: 0.8]
              )
        else {
            logger.error("Failed to convert frame to JPEG")
            return nil
        }

        logger.info("✅ JPEG data created successfully (\(jpegData.count) bytes)")
        return jpegData
    }

    /// Get current capture state information
    func getCaptureState() -> (state: String, description: String) {
        (
            state: stateMachine.currentState.rawValue,
            description: stateMachine.stateDescription()
        )
    }

    /// Send click at specified coordinates
    /// - Parameters:
    ///   - x: X coordinate in 0-1000 normalized range
    ///   - y: Y coordinate in 0-1000 normalized range
    ///   - cgWindowID: Optional window ID for window-specific clicks
    func sendClick(x: Double, y: Double, cgWindowID: Int? = nil) async throws {
        // Validate coordinate boundaries
        guard x >= 0 && x <= 1_000 && y >= 0 && y <= 1_000 else {
            logger.error("⚠️ Invalid click coordinates: (\(x), \(y)) - must be in range 0-1000")
            throw ScreencapError.invalidCoordinates(x: x, y: y)
        }

        // Security audit log - include timestamp for tracking
        let timestamp = Date().timeIntervalSince1970
        logger
            .info(
                "🔒 [AUDIT] Click event at \(timestamp): coords=(\(x), \(y)), windowID=\(cgWindowID?.description ?? "nil")"
            )

        logger.info("🖱️ Received click at normalized coordinates: (\(x), \(y))")

        // Get the capture filter to determine actual dimensions
        guard let filter = captureFilter else {
            throw ScreencapError.notCapturing
        }

        // Convert from 0-1000 normalized coordinates to actual pixel coordinates
        let normalizedX = x / 1_000.0
        let normalizedY = y / 1_000.0

        var pixelX: Double
        var pixelY: Double

        // Calculate pixel coordinates based on capture mode
        switch captureMode {
        case .desktop(let displayIndex):
            // Get SCShareableContent to ensure consistency
            let content = try await SCShareableContent.excludingDesktopWindows(false, onScreenWindowsOnly: false)

            if displayIndex >= 0 && displayIndex < content.displays.count {
                let display = content.displays[displayIndex]
                // Convert normalized to pixel coordinates within the display
                pixelX = display.frame.origin.x + (normalizedX * display.frame.width)
                pixelY = display.frame.origin.y + (normalizedY * display.frame.height)

                logger
                    .info(
                        "📺 Display \(displayIndex): pixel coords=(\(String(format: "%.1f", pixelX)), \(String(format: "%.1f", pixelY)))"
                    )
            } else {
                throw ScreencapError.noDisplay
            }

        case .allDisplays:
            // For all displays, we need to calculate based on the combined bounds
            let content = try await SCShareableContent.excludingDesktopWindows(false, onScreenWindowsOnly: false)

            // Calculate the bounding rectangle
            var minX = CGFloat.greatestFiniteMagnitude
            var minY = CGFloat.greatestFiniteMagnitude
            var maxX: CGFloat = -CGFloat.greatestFiniteMagnitude
            var maxY: CGFloat = -CGFloat.greatestFiniteMagnitude

            for display in content.displays {
                minX = min(minX, display.frame.origin.x)
                minY = min(minY, display.frame.origin.y)
                maxX = max(maxX, display.frame.origin.x + display.frame.width)
                maxY = max(maxY, display.frame.origin.y + display.frame.height)
            }

            let totalWidth = maxX - minX
            let totalHeight = maxY - minY

            // Convert normalized to pixel coordinates within the combined bounds
            pixelX = minX + (normalizedX * totalWidth)
            pixelY = minY + (normalizedY * totalHeight)

            logger
                .info(
                    "🖥️ All displays: pixel coords=(\(String(format: "%.1f", pixelX)), \(String(format: "%.1f", pixelY)))"
                )

        case .window(let window):
            // For window capture, use the window's frame
            pixelX = window.frame.origin.x + (normalizedX * window.frame.width)
            pixelY = window.frame.origin.y + (normalizedY * window.frame.height)

            logger.info("🪟 Window: pixel coords=(\(String(format: "%.1f", pixelX)), \(String(format: "%.1f", pixelY)))")

        case .application:
            // For application capture, use the filter's content rect
            pixelX = filter.contentRect.origin.x + (normalizedX * filter.contentRect.width)
            pixelY = filter.contentRect.origin.y + (normalizedY * filter.contentRect.height)
        }

        // CGEvent uses screen coordinates which have top-left origin, same as our pixel coordinates
        let clickLocation = CGPoint(x: pixelX, y: pixelY)

        logger
            .info(
                "🎯 Final click location: (\(String(format: "%.1f", clickLocation.x)), \(String(format: "%.1f", clickLocation.y)))"
            )

        // Create mouse down event
        guard let mouseDown = CGEvent(
            mouseEventSource: nil,
            mouseType: .leftMouseDown,
            mouseCursorPosition: clickLocation,
            mouseButton: .left
        ) else {
            throw ScreencapError.failedToCreateEvent
        }

        // Create mouse up event
        guard let mouseUp = CGEvent(
            mouseEventSource: nil,
            mouseType: .leftMouseUp,
            mouseCursorPosition: clickLocation,
            mouseButton: .left
        ) else {
            throw ScreencapError.failedToCreateEvent
        }

        // Post events
        mouseDown.post(tap: .cghidEventTap)
        try await Task.sleep(nanoseconds: 50_000_000) // 50ms delay
        mouseUp.post(tap: .cghidEventTap)

        logger.info("✅ Click sent successfully")
    }

    /// Send mouse down event at specified coordinates
    /// - Parameters:
    ///   - x: X coordinate in 0-1000 normalized range
    ///   - y: Y coordinate in 0-1000 normalized range
    func sendMouseDown(x: Double, y: Double) async throws {
        // Validate coordinate boundaries
        guard x >= 0 && x <= 1_000 && y >= 0 && y <= 1_000 else {
            logger.error("⚠️ Invalid mouse down coordinates: (\(x), \(y)) - must be in range 0-1000")
            throw ScreencapError.invalidCoordinates(x: x, y: y)
        }

        // Security audit log
        let timestamp = Date().timeIntervalSince1970
        logger.info("🔒 [AUDIT] Mouse down event at \(timestamp): coords=(\(x), \(y))")

        logger.info("🖱️ Received mouse down at normalized coordinates: (\(x), \(y))")

        // Calculate pixel coordinates (reuse the conversion logic)
        let clickLocation = try await calculateClickLocation(x: x, y: y)

        // Create mouse down event
        guard let mouseDown = CGEvent(
            mouseEventSource: nil,
            mouseType: .leftMouseDown,
            mouseCursorPosition: clickLocation,
            mouseButton: .left
        ) else {
            throw ScreencapError.failedToCreateEvent
        }

        // Post event
        mouseDown.post(tap: .cghidEventTap)

        logger.info("✅ Mouse down sent successfully")
    }

    /// Send mouse move (drag) event at specified coordinates
    /// - Parameters:
    ///   - x: X coordinate in 0-1000 normalized range
    ///   - y: Y coordinate in 0-1000 normalized range
    func sendMouseMove(x: Double, y: Double) async throws {
        // Validate coordinate boundaries
        guard x >= 0 && x <= 1_000 && y >= 0 && y <= 1_000 else {
            logger.error("⚠️ Invalid mouse move coordinates: (\(x), \(y)) - must be in range 0-1000")
            throw ScreencapError.invalidCoordinates(x: x, y: y)
        }

        // Calculate pixel coordinates
        let moveLocation = try await calculateClickLocation(x: x, y: y)

        // Create mouse dragged event
        guard let mouseDrag = CGEvent(
            mouseEventSource: nil,
            mouseType: .leftMouseDragged,
            mouseCursorPosition: moveLocation,
            mouseButton: .left
        ) else {
            throw ScreencapError.failedToCreateEvent
        }

        // Post event
        mouseDrag.post(tap: .cghidEventTap)
    }

    /// Send mouse up event at specified coordinates
    /// - Parameters:
    ///   - x: X coordinate in 0-1000 normalized range
    ///   - y: Y coordinate in 0-1000 normalized range
    func sendMouseUp(x: Double, y: Double) async throws {
        // Validate coordinate boundaries
        guard x >= 0 && x <= 1_000 && y >= 0 && y <= 1_000 else {
            logger.error("⚠️ Invalid mouse up coordinates: (\(x), \(y)) - must be in range 0-1000")
            throw ScreencapError.invalidCoordinates(x: x, y: y)
        }

        // Security audit log
        let timestamp = Date().timeIntervalSince1970
        logger.info("🔒 [AUDIT] Mouse up event at \(timestamp): coords=(\(x), \(y))")

        logger.info("🖱️ Received mouse up at normalized coordinates: (\(x), \(y))")

        // Calculate pixel coordinates
        let clickLocation = try await calculateClickLocation(x: x, y: y)

        // Create mouse up event
        guard let mouseUp = CGEvent(
            mouseEventSource: nil,
            mouseType: .leftMouseUp,
            mouseCursorPosition: clickLocation,
            mouseButton: .left
        ) else {
            throw ScreencapError.failedToCreateEvent
        }

        // Post event
        mouseUp.post(tap: .cghidEventTap)

        logger.info("✅ Mouse up sent successfully")
    }

    /// Calculate pixel location from normalized coordinates
    private func calculateClickLocation(x: Double, y: Double) async throws -> CGPoint {
        // Get the capture filter to determine actual dimensions
        guard let filter = captureFilter else {
            throw ScreencapError.notCapturing
        }

        // Convert from 0-1000 normalized coordinates to actual pixel coordinates
        let normalizedX = x / 1_000.0
        let normalizedY = y / 1_000.0

        var pixelX: Double
        var pixelY: Double

        // Calculate pixel coordinates based on capture mode
        switch captureMode {
        case .desktop(let displayIndex):
            // Get SCShareableContent to ensure consistency
            let content = try await SCShareableContent.excludingDesktopWindows(false, onScreenWindowsOnly: false)

            if displayIndex >= 0 && displayIndex < content.displays.count {
                let display = content.displays[displayIndex]
                // Convert normalized to pixel coordinates within the display
                pixelX = display.frame.origin.x + (normalizedX * display.frame.width)
                pixelY = display.frame.origin.y + (normalizedY * display.frame.height)
            } else {
                throw ScreencapError.noDisplay
            }

        case .allDisplays:
            // For all displays, we need to calculate based on the combined bounds
            let content = try await SCShareableContent.excludingDesktopWindows(false, onScreenWindowsOnly: false)

            // Calculate the bounding rectangle
            var minX = CGFloat.greatestFiniteMagnitude
            var minY = CGFloat.greatestFiniteMagnitude
            var maxX: CGFloat = -CGFloat.greatestFiniteMagnitude
            var maxY: CGFloat = -CGFloat.greatestFiniteMagnitude

            for display in content.displays {
                minX = min(minX, display.frame.origin.x)
                minY = min(minY, display.frame.origin.y)
                maxX = max(maxX, display.frame.origin.x + display.frame.width)
                maxY = max(maxY, display.frame.origin.y + display.frame.height)
            }

            let totalWidth = maxX - minX
            let totalHeight = maxY - minY

            // Convert normalized to pixel coordinates within the combined bounds
            pixelX = minX + (normalizedX * totalWidth)
            pixelY = minY + (normalizedY * totalHeight)

        case .window(let window):
            // For window capture, use the window's frame
            pixelX = window.frame.origin.x + (normalizedX * window.frame.width)
            pixelY = window.frame.origin.y + (normalizedY * window.frame.height)

        case .application:
            // For application capture, use the filter's content rect
            pixelX = filter.contentRect.origin.x + (normalizedX * filter.contentRect.width)
            pixelY = filter.contentRect.origin.y + (normalizedY * filter.contentRect.height)
        }

        // CGEvent uses screen coordinates which have top-left origin, same as our pixel coordinates
        return CGPoint(x: pixelX, y: pixelY)
    }

    /// Send keyboard input
    func sendKey(
        key: String,
        metaKey: Bool = false,
        ctrlKey: Bool = false,
        altKey: Bool = false,
        shiftKey: Bool = false
    )
        async throws
    {
        // Validate key input
        guard !key.isEmpty && key.count <= 20 else {
            logger.error("⚠️ Invalid key input: '\(key)' - must be non-empty and <= 20 characters")
            throw ScreencapError.invalidKeyInput(key)
        }

        // Security audit log - include timestamp for tracking
        let timestamp = Date().timeIntervalSince1970
        logger
            .info(
                "🔒 [AUDIT] Key event at \(timestamp): key='\(key)', modifiers=[cmd:\(metaKey), ctrl:\(ctrlKey), alt:\(altKey), shift:\(shiftKey)]"
            )

        // Convert key string to key code
        let keyCode = keyStringToKeyCode(key)

        // Create key down event
        guard let keyDown = CGEvent(keyboardEventSource: nil, virtualKey: keyCode, keyDown: true) else {
            throw ScreencapError.failedToCreateEvent
        }

        // Create key up event
        guard let keyUp = CGEvent(keyboardEventSource: nil, virtualKey: keyCode, keyDown: false) else {
            throw ScreencapError.failedToCreateEvent
        }

        // Set modifier flags
        var flags: CGEventFlags = []
        if metaKey { flags.insert(.maskCommand) }
        if ctrlKey { flags.insert(.maskControl) }
        if altKey { flags.insert(.maskAlternate) }
        if shiftKey { flags.insert(.maskShift) }

        keyDown.flags = flags
        keyUp.flags = flags

        // Post events
        keyDown.post(tap: .cghidEventTap)
        try await Task.sleep(nanoseconds: 50_000_000) // 50ms delay
        keyUp.post(tap: .cghidEventTap)

        logger.info("Sent key: \(key) with modifiers")
    }

    // MARK: - State Machine Setup

    /// Configure state machine callbacks
    private func setupStateMachine() {
        stateMachine.onStateChange = { [weak self] newState, previousState in
            guard let self else { return }
            self.logger.info("📊 State changed: \(previousState?.description ?? "nil") → \(newState)")

            // Notify WebRTC manager of state changes
            if let webRTCManager = self.webRTCManager {
                Task {
                    await webRTCManager.sendSignalMessage([
                        "type": "state-change",
                        "state": newState.rawValue,
                        "previousState": previousState?.rawValue as Any
                    ])
                }
            }
        }
    }

    // MARK: - Display Monitoring

    /// Set up notifications for display configuration changes
    private func setupDisplayNotifications() {
        // Monitor for display configuration changes
        NotificationCenter.default.addObserver(
            self,
            selector: #selector(displayConfigurationChanged),
            name: NSApplication.didChangeScreenParametersNotification,
            object: nil
        )

        logger.info("📺 Display monitoring enabled")
    }

    /// Handle display configuration changes
    @objc
    private func displayConfigurationChanged(_ notification: Notification) {
        logger.warning("⚠️ Display configuration changed")

        // Check if we're currently capturing
        guard isCapturing else {
            logger.info("Not capturing, ignoring display change")
            return
        }

        Task { @MainActor in
            await handleDisplayChange()
        }
    }

    /// Handle display disconnection or reconfiguration during capture
    private func handleDisplayChange() async {
        logger.info("🔄 Handling display configuration change during capture")

        // Transition to reconnecting state
        stateMachine.processEvent(.displayChanged)

        // Get current capture mode
        let captureMode = self.captureMode

        // Stop current capture
        await stopCapture()

        // Wait a moment for the system to stabilize
        try? await Task.sleep(nanoseconds: 500_000_000) // 0.5 seconds

        do {
            // Check if displays are still available
            let displays = try await getDisplays()

            switch captureMode {
            case .desktop(let displayIndex):
                // Check if the display index is still valid
                if displayIndex < displays.count {
                    // Restart capture with same display
                    logger.info("✅ Display \(displayIndex) still available, restarting capture")
                    try await startCapture(type: "display", index: displayIndex, useWebRTC: useWebRTC)
                } else if !displays.isEmpty {
                    // Fall back to primary display
                    logger.warning("⚠️ Display \(displayIndex) no longer available, falling back to primary display")
                    try await startCapture(type: "display", index: 0, useWebRTC: useWebRTC)
                } else {
                    logger.error("❌ No displays available after configuration change")
                    // Notify connected clients
                    await notifyDisplayDisconnected()
                }

            case .window:
                // For window capture, try to restart with the same window
                if let window = selectedWindow {
                    do {
                        // Verify window still exists
                        let content = try await SCShareableContent.current
                        if content.windows.contains(where: { $0.windowID == window.windowID }) {
                            logger.info("✅ Window still available, restarting capture")
                            try await startCaptureWindow(cgWindowID: Int(window.windowID), useWebRTC: useWebRTC)
                        } else {
                            logger.warning("⚠️ Window no longer available after display change")
                            await notifyWindowDisconnected()
                        }
                    } catch {
                        logger.error("Failed to verify window availability: \(error)")
                        await notifyWindowDisconnected()
                    }
                }

            case .allDisplays:
                // For all displays mode, just restart
                logger.info("🔄 Restarting all displays capture after configuration change")
                try await startCapture(type: "display", index: -1, useWebRTC: useWebRTC)

            case .application:
                // For application capture, try to restart with the same application
                logger.info("🔄 Application capture mode - checking if still available")
                // For now, just notify that the display configuration changed
                await notifyDisplayDisconnected()
            }
        } catch {
            logger.error("❌ Failed to handle display change: \(error)")
            await notifyDisplayDisconnected()
        }
    }

    /// Notify connected clients that display was disconnected
    private func notifyDisplayDisconnected() async {
        if let webRTCManager {
            await webRTCManager.sendSignalMessage([
                "type": "display-disconnected",
                "message": "Display disconnected during capture"
            ])
        }
    }

    /// Notify connected clients that window was disconnected
    private func notifyWindowDisconnected() async {
        if let webRTCManager {
            await webRTCManager.sendSignalMessage([
                "type": "window-disconnected",
                "message": "Window closed or became unavailable"
            ])
        }
    }

    // MARK: - Private Methods

    private func keyStringToKeyCode(_ key: String) -> CGKeyCode {
        // Basic key mapping - this should be expanded
        switch key.lowercased() {
        case "a": 0x00
        case "s": 0x01
        case "d": 0x02
        case "f": 0x03
        case "h": 0x04
        case "g": 0x05
        case "z": 0x06
        case "x": 0x07
        case "c": 0x08
        case "v": 0x09
        case "b": 0x0B
        case "q": 0x0C
        case "w": 0x0D
        case "e": 0x0E
        case "r": 0x0F
        case "y": 0x10
        case "t": 0x11
        case "1": 0x12
        case "2": 0x13
        case "3": 0x14
        case "4": 0x15
        case "6": 0x16
        case "5": 0x17
        case "=": 0x18
        case "9": 0x19
        case "7": 0x1A
        case "-": 0x1B
        case "8": 0x1C
        case "0": 0x1D
        case "]": 0x1E
        case "o": 0x1F
        case "u": 0x20
        case "[": 0x21
        case "i": 0x22
        case "p": 0x23
        case "l": 0x25
        case "j": 0x26
        case "'": 0x27
        case "k": 0x28
        case ";": 0x29
        case "\\": 0x2A
        case ",": 0x2B
        case "/": 0x2C
        case "n": 0x2D
        case "m": 0x2E
        case ".": 0x2F
        case " ", "space": 0x31
        case "enter", "return": 0x24
        case "tab": 0x30
        case "escape", "esc": 0x35
        case "backspace", "delete": 0x33
        case "arrowup", "up": 0x7E
        case "arrowdown", "down": 0x7D
        case "arrowleft", "left": 0x7B
        case "arrowright", "right": 0x7C
        default: 0x00 // Default to 'a'
        }
    }
}

// MARK: - SCStreamDelegate

extension ScreencapService: SCStreamDelegate {
    public nonisolated func stream(_ stream: SCStream, didStopWithError error: Error) {
        Task { [weak self] in
            await self?.handleStreamError(error)
        }
    }

    private func handleStreamError(_ error: Error) {
        logger.error("Stream stopped with error: \(error)")
        isCapturing = false
        captureStream = nil
    }
}

// MARK: - SCStreamOutput

extension ScreencapService: SCStreamOutput {
    public nonisolated func stream(
        _ stream: SCStream,
        didOutputSampleBuffer sampleBuffer: CMSampleBuffer,
        of type: SCStreamOutputType
    ) {
        guard type == .screen else {
            // Log other types occasionally
            if Int.random(in: 0..<100) == 0 {
                // Cannot log from nonisolated context, skip logging
            }
            return
        }

        // Track frame reception - log first frame and then periodically
        // Use random sampling to avoid concurrency issues
        let shouldLog = Int.random(in: 0..<300) == 0

        // Log sample buffer format details
        if let formatDesc = CMSampleBufferGetFormatDescription(sampleBuffer) {
            _ = CMFormatDescriptionGetMediaType(formatDesc)
            let mediaSubType = CMFormatDescriptionGetMediaSubType(formatDesc)
            let dimensions = CMVideoFormatDescriptionGetDimensions(formatDesc)

            // Only log occasionally to reduce noise
            if shouldLog {
                Task { @MainActor in
                    self.logger.info("📊 Frame received - dimensions: \(dimensions.width)x\(dimensions.height)")
                    self.logger.info("🎨 Pixel format: \(String(format: "0x%08X", mediaSubType))")
                    // Mark that we're receiving frames
                    if self.frameCounter == 0 {
                        self.logger.info("🎬 FIRST FRAME RECEIVED!")
                    }
                    self.frameCounter += 1
                }
            }
        }

        // Check if sample buffer is ready
        if !CMSampleBufferDataIsReady(sampleBuffer) {
            // Cannot log from nonisolated context, skip warning
            return
        }

        // Get sample buffer attachments to check frame status
        guard let attachmentsArray = CMSampleBufferGetSampleAttachmentsArray(
            sampleBuffer,
            createIfNecessary: false
        ) as? [[SCStreamFrameInfo: Any]],
            let attachments = attachmentsArray.first
        else {
            if shouldLog {
                // Cannot log from nonisolated context, skip debug message
            }
            return
        }

        // Check frame status - only process complete frames
        if let statusRawValue = attachments[SCStreamFrameInfo.status] as? Int,
           let status = SCFrameStatus(rawValue: statusRawValue),
           status != .complete
        {
            if shouldLog {
                // Cannot log from nonisolated context, skip debug message
            }
            return
        }

        // Get pixel buffer immediately
        guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else {
            // Log this issue but only occasionally
            if shouldLog {
                // Cannot log from nonisolated context, skip warning
            }
            return
        }

        // We have a pixel buffer! Process it for WebRTC if enabled
        if useWebRTC, let webRTCManager {
            // The processVideoFrame method is nonisolated and accepts a sending parameter
            // We can call it directly without creating a Task, avoiding the closure capture issue
            webRTCManager.processVideoFrameSync(sampleBuffer)

            // Log occasionally
            if shouldLog {
                Task { @MainActor in
                    self.logger.info("🌐 Forwarding frame to WebRTC manager")
                }
            }
        } else if shouldLog {
            Task { @MainActor in
                self.logger.info("🖼️ WebRTC disabled - using JPEG mode")
            }
        }

        // Create CIImage and process for display
        // Only create and process if we have a valid pixel buffer
        guard CVPixelBufferGetWidth(pixelBuffer) > 0 && CVPixelBufferGetHeight(pixelBuffer) > 0 else {
            return
        }

        let ciImage = CIImage(cvPixelBuffer: pixelBuffer)
        Task { @MainActor [weak self] in
            guard let self else { return }
            await self.processFrame(ciImage: ciImage)
        }
    }

    /// Separate async function to handle frame processing
    @MainActor
    private func processFrame(ciImage: CIImage) async {
        // Check if we're still capturing before processing
        guard isCapturing else {
            logger.debug("Skipping frame processing - capture stopped")
            return
        }

        let context = CIContext()

        // Check extent is valid
        guard !ciImage.extent.isEmpty else {
            logger.error("CIImage has empty extent, skipping frame")
            return
        }

        guard let cgImage = context.createCGImage(ciImage, from: ciImage.extent) else {
            logger.error("Failed to create CGImage from CIImage")
            return
        }

        // Check again if we're still capturing before updating frame
        guard isCapturing else {
            logger.debug("Capture stopped during frame processing")
            return
        }

        // Update current frame
        currentFrame = cgImage
        let frameCount = frameCounter
        frameCounter += 1

        // Log only every 300 frames (10 seconds at 30fps) to reduce noise
        if frameCount.isMultiple(of: 300) {
            logger.info("📹 Frame \(frameCount) received")
        }
    }
}

// MARK: - Error Types

enum ScreencapError: LocalizedError {
    case noDisplay
    case invalidWindowIndex
    case invalidApplicationIndex
    case invalidCaptureType
    case failedToCreateEvent
    case notCapturing
    case failedToGetContent(Error)
    case invalidConfiguration
    case failedToStartCapture(Error)
    case invalidCoordinates(x: Double, y: Double)
    case invalidKeyInput(String)
    case serviceNotReady

    var errorDescription: String? {
        switch self {
        case .noDisplay:
            "No display available"
        case .invalidWindowIndex:
            "Invalid window index"
        case .invalidApplicationIndex:
            "Invalid application index"
        case .invalidCaptureType:
            "Invalid capture type"
        case .failedToCreateEvent:
            "Failed to create input event"
        case .notCapturing:
            "Not currently capturing"
        case .failedToGetContent(let error):
            "Failed to get screen content: \(error.localizedDescription)"
        case .invalidConfiguration:
            "Invalid capture configuration"
        case .failedToStartCapture(let error):
            "Failed to start capture: \(error.localizedDescription)"
        case .invalidCoordinates(let x, let y):
            "Invalid coordinates (\(x), \(y)) - must be in range 0-1000"
        case .invalidKeyInput(let key):
            "Invalid key input: '\(key)' - must be non-empty and <= 20 characters"
        case .serviceNotReady:
            "Screen capture service is not ready. Connection may still be initializing."
        }
    }
}