From d5b40c15508d7ac8ded50aeb8eb4edcabc9e4f73 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 8 Jun 2025 08:42:43 +0100 Subject: [PATCH] feat: Implement proper frontmost window capture MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds support for capturing the frontmost window of the frontmost application instead of falling back to screen capture mode. Changes: - Added 'frontmost' case to CaptureMode enum in Swift CLI - Implemented captureFrontmostWindow() method using NSWorkspace.shared.frontmostApplication - Updated TypeScript to use --mode frontmost instead of defaulting to screen mode - Added comprehensive test coverage for frontmost functionality - Updated existing tests to reflect new behavior The frontmost mode now: 1. Detects the currently active application 2. Captures only its frontmost window (index 0) 3. Returns a single image file with proper metadata 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- .../Sources/peekaboo/ImageCommand.swift | 119 ++++++++---------- .../Sources/peekaboo/ImageErrorHandler.swift | 75 +++++++++++ peekaboo-cli/Sources/peekaboo/Models.swift | 1 + .../Sources/peekaboo/OutputPathResolver.swift | 8 +- peekaboo-cli/Sources/peekaboo/Version.swift | 2 +- src/utils/image-cli-args.ts | 9 +- tests/mocks/peekaboo-cli.mock.ts | 20 +++ tests/unit/tools/image.test.ts | 26 ++-- 8 files changed, 166 insertions(+), 94 deletions(-) create mode 100644 peekaboo-cli/Sources/peekaboo/ImageErrorHandler.swift diff --git a/peekaboo-cli/Sources/peekaboo/ImageCommand.swift b/peekaboo-cli/Sources/peekaboo/ImageCommand.swift index 012c942..c00618a 100644 --- a/peekaboo-cli/Sources/peekaboo/ImageCommand.swift +++ b/peekaboo-cli/Sources/peekaboo/ImageCommand.swift @@ -80,6 +80,8 @@ struct ImageCommand: ParsableCommand { } else { return try captureScreens() } + case .frontmost: + return try captureFrontmostWindow() } } @@ -96,75 +98,8 @@ struct ImageCommand: ParsableCommand { } } - private func handleError(_ error: Error) { - let captureError: CaptureError = if let err = error as? CaptureError { - err - } else { - .unknownError(error.localizedDescription) - } - - // Log the full error details for debugging - Logger.shared.debug("Image capture error: \(error)") - - // If it's a CaptureError with an underlying error, log that too - switch captureError { - case let .captureCreationFailed(underlyingError): - if let underlying = underlyingError { - Logger.shared.debug("Underlying capture creation error: \(underlying)") - } - case let .windowCaptureFailed(underlyingError): - if let underlying = underlyingError { - Logger.shared.debug("Underlying window capture error: \(underlying)") - } - case let .fileWriteError(_, underlyingError): - if let underlying = underlyingError { - Logger.shared.debug("Underlying file write error: \(underlying)") - } - default: - break - } - - if jsonOutput { - let code: ErrorCode = switch captureError { - case .screenRecordingPermissionDenied: - .PERMISSION_ERROR_SCREEN_RECORDING - case .accessibilityPermissionDenied: - .PERMISSION_ERROR_ACCESSIBILITY - case .appNotFound: - .APP_NOT_FOUND - case .windowNotFound, .noWindowsFound: - .WINDOW_NOT_FOUND - case .fileWriteError: - .FILE_IO_ERROR - case .invalidArgument: - .INVALID_ARGUMENT - case .unknownError: - .UNKNOWN_ERROR - default: - .CAPTURE_FAILED - } - - // Provide additional details for app not found errors - var details: String? - if case .appNotFound = captureError { - let runningApps = NSWorkspace.shared.runningApplications - .filter { $0.activationPolicy == .regular } - .compactMap(\.localizedName) - .sorted() - .joined(separator: ", ") - details = "Available applications: \(runningApps)" - } - - outputError( - message: captureError.localizedDescription, - code: code, - details: details ?? "Image capture operation failed" - ) - } else { - var localStandardErrorStream = FileHandleTextOutputStream(FileHandle.standardError) - print("Error: \(captureError.localizedDescription)", to: &localStandardErrorStream) - } - Foundation.exit(captureError.exitCode) + private func handleError(_ error: Error) -> Never { + ImageErrorHandler.handleError(error, jsonOutput: jsonOutput) } private func determineMode() -> CaptureMode { @@ -307,7 +242,10 @@ struct ImageCommand: ParsableCommand { let searchTerm = windowTitle let appName = targetApp.localizedName ?? "Unknown" - Logger.shared.debug("Window not found. Searched for '\(searchTerm)' in \(appName). Available windows: \(availableTitles)") + Logger.shared.debug( + "Window not found. Searched for '\(searchTerm)' in \(appName). " + + "Available windows: \(availableTitles)" + ) throw CaptureError.windowTitleNotFound(searchTerm, appName, availableTitles) } @@ -502,4 +440,45 @@ struct ImageCommand: ParsableCommand { throw CaptureError.windowCaptureFailed(error) } } + + private func captureFrontmostWindow() throws -> [SavedFile] { + Logger.shared.debug("Capturing frontmost window") + + // Get the frontmost (active) application + guard let frontmostApp = NSWorkspace.shared.frontmostApplication else { + throw CaptureError.appNotFound("No frontmost application found") + } + + Logger.shared.debug("Frontmost app: \(frontmostApp.localizedName ?? "Unknown")") + + // Get windows for the frontmost app + let windows = try WindowManager.getWindowsForApp(pid: frontmostApp.processIdentifier) + guard !windows.isEmpty else { + throw CaptureError.noWindowsFound(frontmostApp.localizedName ?? "frontmost application") + } + + // Get the frontmost window (index 0) + let frontmostWindow = windows[0] + + Logger.shared.debug("Capturing frontmost window: '\(frontmostWindow.title)'") + + // Generate output path + let timestamp = DateFormatter.timestamp.string(from: Date()) + let appName = frontmostApp.localizedName ?? "UnknownApp" + let safeName = appName.replacingOccurrences(of: " ", with: "_") + let fileName = "frontmost_\(safeName)_\(timestamp).\(format.rawValue)" + let filePath = OutputPathResolver.getOutputPathWithFallback(basePath: path, fileName: fileName) + + // Capture the window + try captureWindow(frontmostWindow, to: filePath) + + return [SavedFile( + path: filePath, + item_label: appName, + window_title: frontmostWindow.title, + window_id: UInt32(frontmostWindow.windowId), + window_index: frontmostWindow.windowIndex, + mime_type: format == .png ? "image/png" : "image/jpeg" + )] + } } diff --git a/peekaboo-cli/Sources/peekaboo/ImageErrorHandler.swift b/peekaboo-cli/Sources/peekaboo/ImageErrorHandler.swift new file mode 100644 index 0000000..0d82891 --- /dev/null +++ b/peekaboo-cli/Sources/peekaboo/ImageErrorHandler.swift @@ -0,0 +1,75 @@ +import Foundation +import AppKit + +struct ImageErrorHandler { + static func handleError(_ error: Error, jsonOutput: Bool) -> Never { + let captureError: CaptureError = if let err = error as? CaptureError { + err + } else { + .unknownError(error.localizedDescription) + } + + // Log the full error details for debugging + Logger.shared.debug("Image capture error: \(error)") + + // If it's a CaptureError with an underlying error, log that too + switch captureError { + case let .captureCreationFailed(underlyingError): + if let underlying = underlyingError { + Logger.shared.debug("Underlying capture creation error: \(underlying)") + } + case let .windowCaptureFailed(underlyingError): + if let underlying = underlyingError { + Logger.shared.debug("Underlying window capture error: \(underlying)") + } + case let .fileWriteError(_, underlyingError): + if let underlying = underlyingError { + Logger.shared.debug("Underlying file write error: \(underlying)") + } + default: + break + } + + if jsonOutput { + let code: ErrorCode = switch captureError { + case .screenRecordingPermissionDenied: + .PERMISSION_ERROR_SCREEN_RECORDING + case .accessibilityPermissionDenied: + .PERMISSION_ERROR_ACCESSIBILITY + case .appNotFound: + .APP_NOT_FOUND + case .windowNotFound, .noWindowsFound: + .WINDOW_NOT_FOUND + case .fileWriteError: + .FILE_IO_ERROR + case .invalidArgument: + .INVALID_ARGUMENT + case .unknownError: + .UNKNOWN_ERROR + default: + .CAPTURE_FAILED + } + + // Provide additional details for app not found errors + var details: String? + if case .appNotFound = captureError { + let runningApps = NSWorkspace.shared.runningApplications + .filter { $0.activationPolicy == .regular } + .compactMap(\.localizedName) + .sorted() + .joined(separator: ", ") + details = "Available applications: \(runningApps)" + } + + outputError( + message: captureError.localizedDescription, + code: code, + details: details ?? "Image capture operation failed" + ) + } else { + var localStandardErrorStream = FileHandleTextOutputStream(FileHandle.standardError) + print("Error: \(captureError.localizedDescription)", to: &localStandardErrorStream) + } + Foundation.exit(captureError.exitCode) + } +} diff --git a/peekaboo-cli/Sources/peekaboo/Models.swift b/peekaboo-cli/Sources/peekaboo/Models.swift index 1df15ec..3089eb2 100644 --- a/peekaboo-cli/Sources/peekaboo/Models.swift +++ b/peekaboo-cli/Sources/peekaboo/Models.swift @@ -20,6 +20,7 @@ enum CaptureMode: String, CaseIterable, ExpressibleByArgument { case screen case window case multi + case frontmost } enum ImageFormat: String, CaseIterable, ExpressibleByArgument { diff --git a/peekaboo-cli/Sources/peekaboo/OutputPathResolver.swift b/peekaboo-cli/Sources/peekaboo/OutputPathResolver.swift index 73f4c24..aed31bf 100644 --- a/peekaboo-cli/Sources/peekaboo/OutputPathResolver.swift +++ b/peekaboo-cli/Sources/peekaboo/OutputPathResolver.swift @@ -129,11 +129,9 @@ struct OutputPathResolver { let sensitivePathPrefixes = ["/etc/", "/usr/", "/bin/", "/sbin/", "/System/", "/Library/System/"] let normalizedPath = (path as NSString).standardizingPath - for prefix in sensitivePathPrefixes { - if normalizedPath.hasPrefix(prefix) { - Logger.shared.debug("Path points to system directory: \(path) -> \(normalizedPath)") - break - } + for prefix in sensitivePathPrefixes where normalizedPath.hasPrefix(prefix) { + Logger.shared.debug("Path points to system directory: \(path) -> \(normalizedPath)") + break } } } diff --git a/peekaboo-cli/Sources/peekaboo/Version.swift b/peekaboo-cli/Sources/peekaboo/Version.swift index c266625..a0c6f32 100644 --- a/peekaboo-cli/Sources/peekaboo/Version.swift +++ b/peekaboo-cli/Sources/peekaboo/Version.swift @@ -1,4 +1,4 @@ // This file is auto-generated by the build script. Do not edit manually. enum Version { - static let current = "1.0.0-beta.20" + static let current = "1.0.0-beta.21" } diff --git a/src/utils/image-cli-args.ts b/src/utils/image-cli-args.ts index 39a6666..cebabef 100644 --- a/src/utils/image-cli-args.ts +++ b/src/utils/image-cli-args.ts @@ -82,11 +82,10 @@ export function buildSwiftCliArgs( args.push("--mode", "screen", "--screen-index", screenIndex.toString()); } } else if (input.app_target.toLowerCase() === "frontmost") { - // 'frontmost': All windows of the frontmost app - log.warn( - "'frontmost' target requires determining current frontmost app, defaulting to screen mode", - ); - args.push("--mode", "screen"); + // 'frontmost': Capture the frontmost window of the frontmost app + // This requires special handling to first find the frontmost app, then capture its frontmost window + log.debug("Using frontmost mode - will attempt to capture frontmost window"); + args.push("--mode", "frontmost"); } else if (input.app_target.includes(":")) { // 'AppName:WINDOW_TITLE:Title' or 'AppName:WINDOW_INDEX:Index' const parts = input.app_target.split(":"); diff --git a/tests/mocks/peekaboo-cli.mock.ts b/tests/mocks/peekaboo-cli.mock.ts index 74c7e6b..aa3a291 100644 --- a/tests/mocks/peekaboo-cli.mock.ts +++ b/tests/mocks/peekaboo-cli.mock.ts @@ -94,6 +94,26 @@ export const mockSwiftCli = { }; }, + // Mock frontmost window capture response + captureFrontmostWindow(): SwiftCliResponse { + return { + success: true, + data: { + saved_files: [ + { + path: "/tmp/frontmost_Safari_20250608_083000.png", + item_label: "Safari", + window_title: "Example Website - Safari", + window_id: 12345, + window_index: 0, + mime_type: "image/png", + }, + ], + } as ImageCaptureData, + messages: [], + }; + }, + // Mock error responses permissionDenied(): SwiftCliResponse { return { diff --git a/tests/unit/tools/image.test.ts b/tests/unit/tools/image.test.ts index 66e7d91..b55851b 100644 --- a/tests/unit/tools/image.test.ts +++ b/tests/unit/tools/image.test.ts @@ -383,28 +383,28 @@ describe("Image Tool", () => { ); }); - it("should handle app_target: 'frontmost' with warning", async () => { + it("should handle app_target: 'frontmost' with new frontmost mode", async () => { // Mock resolveImagePath for minimal case mockResolveImagePath.mockResolvedValue({ effectivePath: MOCK_TEMP_IMAGE_DIR, tempDirUsed: MOCK_TEMP_IMAGE_DIR, }); - const mockResponse = mockSwiftCli.captureImage("screen", {}); + const mockResponse = mockSwiftCli.captureFrontmostWindow(); mockExecuteSwiftCli.mockResolvedValue(mockResponse); - const loggerWarnSpy = vi.spyOn(mockLogger, "warn"); + const loggerDebugSpy = vi.spyOn(mockLogger, "debug"); await imageToolHandler( { app_target: "frontmost" }, mockContext, ); - expect(loggerWarnSpy).toHaveBeenCalledWith( - "'frontmost' target requires determining current frontmost app, defaulting to screen mode", + expect(loggerDebugSpy).toHaveBeenCalledWith( + "Using frontmost mode - will attempt to capture frontmost window", ); expect(mockExecuteSwiftCli).toHaveBeenCalledWith( - expect.arrayContaining(["--mode", "screen"]), + expect.arrayContaining(["--mode", "frontmost"]), mockLogger, expect.objectContaining({ timeout: expect.any(Number) }) ); @@ -1035,33 +1035,33 @@ describe("Image Tool", () => { }); it("should handle app_target: 'frontmost'", () => { - const loggerWarnSpy = vi.spyOn(mockLogger, "warn"); + const loggerDebugSpy = vi.spyOn(mockLogger, "debug"); const args = buildSwiftCliArgs({ app_target: "frontmost" }, undefined, undefined, mockLogger); expect(args).toEqual( - expect.arrayContaining(["--mode", "screen"]), + expect.arrayContaining(["--mode", "frontmost"]), ); expect(args).not.toContain("--app"); - expect(loggerWarnSpy).toHaveBeenCalled(); + expect(loggerDebugSpy).toHaveBeenCalledWith("Using frontmost mode - will attempt to capture frontmost window"); }); it("should handle app_target: 'frontmost' case-insensitively", () => { - const loggerWarnSpy = vi.spyOn(mockLogger, "warn"); + const loggerDebugSpy = vi.spyOn(mockLogger, "debug"); // Test uppercase const argsUpper = buildSwiftCliArgs({ app_target: "FRONTMOST" }, undefined, undefined, mockLogger); expect(argsUpper).toEqual( - expect.arrayContaining(["--mode", "screen"]), + expect.arrayContaining(["--mode", "frontmost"]), ); expect(argsUpper).not.toContain("--app"); // Test mixed case const argsMixed = buildSwiftCliArgs({ app_target: "Frontmost" }, undefined, undefined, mockLogger); expect(argsMixed).toEqual( - expect.arrayContaining(["--mode", "screen"]), + expect.arrayContaining(["--mode", "frontmost"]), ); expect(argsMixed).not.toContain("--app"); - expect(loggerWarnSpy).toHaveBeenCalledTimes(2); + expect(loggerDebugSpy).toHaveBeenCalledTimes(2); }); it("should handle window specifiers case-insensitively", () => {