diff --git a/CHANGELOG.md b/CHANGELOG.md index 7033824..6374907 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,27 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added +- New "auto" capture focus mode for the `image` tool, which intelligently brings windows to the foreground only when needed. If a target window is already active, screenshots are taken immediately. If the window is in the background, it's automatically brought to the foreground first. This provides the optimal user experience by making screenshots "just work" in most scenarios. + +### Changed +- The default `capture_focus` behavior for the `image` tool has changed from "background" to "auto". This ensures better screenshot success rates while maintaining efficiency by only activating windows when necessary. + +## [1.0.0-beta.21] - 2025-01-10 + +### Fixed +- The `list` tool no longer returns a generic "unknown error" when a non-existent `app` is specified. It now returns a clear error message: `"List operation failed: The specified application ('AppName') is not running or could not be found."`, improving usability and error diagnosis. + +## [1.0.0-beta.20] - 2025-01-09 + +### Changed +- Improved error message for the `image` tool. When an `app_target` is specified for a running application that has no visible windows, the tool now returns a specific error (`"Image capture failed: The 'AppName' process is running, but it has no capturable windows..."`) instead of a generic "window not found" error. This provides clearer feedback and suggests using `capture_focus: 'foreground'` as a remedy. + +## [1.0.0-beta.19] - 2025-01-08 + +### Changed +- The `image` tool's behavior has been updated. When a `question` is provided for analysis and no `path` is specified, the tool now preserves the captured image(s) in their temporary directory instead of deleting them. The paths to these saved files are now correctly returned in the `saved_files` array, making them accessible after the tool run completes. + ## [1.0.0-beta.18] - 2025-01-08 ### Fixed @@ -222,25 +243,4 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Replaced `app`, `mode`, and `window_specifier` parameters with a single `app_target` string (e.g., `"AppName"`, `"AppName:WINDOW_TITLE:Title"`, `"screen:0"`). - `format` parameter now includes `"data"` option to return Base64 PNG data directly. If `path` is also given with `format: "data"`, file is saved (as PNG) AND data is returned. - If `path` is omitted, `image` tool now defaults to `format: "data"` behavior (returns Base64 PNG data). - - `return_data` parameter removed (behavior now implied by `format` and `path`). - - `provider_config` parameter removed. AI provider for analysis (when `question` is supplied) is now automatically selected from `PEEKABOO_AI_PROVIDERS` environment variable. -- **Node.js `imageToolHandler` and `buildSwiftCliArgs`:** Refactored to support the new `image` tool API and `--screen-index`. -- **Tests:** Unit and Integration tests for the `image` tool were extensively updated to reflect the API changes and new functionalities. - -### 🐛 Fixed - -- Addressed an issue in `src/tools/image.ts` where `logger.debug()` could be called without checking for logger existence (relevant for `buildSwiftCliArgs` if called in an unexpected context, though typically safe). - -### Added -- Add support for `PEEKABOO_DEFAULT_SAVE_PATH` to specify a default directory for saving images. -- The `list` tool now includes a `server_status` item type to retrieve server version and AI provider configuration. -- The server status is now appended to all tool descriptions on `ListToolsRequest`. -- Added comprehensive logging with `pino`, with log level and file path configurable via `PEEKABOO_LOG_LEVEL` and `PEEKABOO_LOG_FILE`. - -### Changed -- The `image` tool's `path` argument is now optional. If omitted, the `format` defaults to `"data"` and the image is returned as a Base64 string. -- If an `image` capture is followed by a `question` for analysis and no `path` is given, a temporary file is created and deleted after analysis. - -### Fixed -- Resolved an issue where the server could crash if the log file directory was not writable. It now falls back to a temporary directory. -- Ensured the `peekaboo` Swift CLI binary is correctly located when the package is installed globally or used via `npx`. \ No newline at end of file + - ` \ No newline at end of file diff --git a/docs/spec.md b/docs/spec.md index 367b6ba..29cc70e 100644 --- a/docs/spec.md +++ b/docs/spec.md @@ -132,7 +132,7 @@ Configured AI Providers (from PEEKABOO_AI_PROVIDERS ENV): [SavedFile] { let targetApp = try ApplicationFinder.findApplication(identifier: appIdentifier) - if captureFocus == .foreground { + if captureFocus == .foreground || (captureFocus == .auto && !targetApp.isActive) { try PermissionsChecker.requireAccessibilityPermission() targetApp.activate() Thread.sleep(forTimeInterval: 0.2) // Brief delay for activation @@ -262,7 +262,7 @@ struct ImageCommand: ParsableCommand { private func captureAllApplicationWindows(_ appIdentifier: String) throws -> [SavedFile] { let targetApp = try ApplicationFinder.findApplication(identifier: appIdentifier) - if captureFocus == .foreground { + if captureFocus == .foreground || (captureFocus == .auto && !targetApp.isActive) { try PermissionsChecker.requireAccessibilityPermission() targetApp.activate() Thread.sleep(forTimeInterval: 0.2) diff --git a/peekaboo-cli/Sources/peekaboo/ListCommand.swift b/peekaboo-cli/Sources/peekaboo/ListCommand.swift index 6102be0..427ac22 100644 --- a/peekaboo-cli/Sources/peekaboo/ListCommand.swift +++ b/peekaboo-cli/Sources/peekaboo/ListCommand.swift @@ -43,6 +43,13 @@ struct AppsSubcommand: ParsableCommand { private func handleError(_ error: Error) { let captureError: CaptureError = if let err = error as? CaptureError { err + } else if let appError = error as? ApplicationError { + switch appError { + case .notFound(let identifier): + .appNotFound(identifier) + case .ambiguous(let identifier, _): + .invalidArgument("Ambiguous application identifier: '\(identifier)'") + } } else { .unknownError(error.localizedDescription) } @@ -142,6 +149,13 @@ struct WindowsSubcommand: ParsableCommand { private func handleError(_ error: Error) { let captureError: CaptureError = if let err = error as? CaptureError { err + } else if let appError = error as? ApplicationError { + switch appError { + case .notFound(let identifier): + .appNotFound(identifier) + case .ambiguous(let identifier, _): + .invalidArgument("Ambiguous application identifier: '\(identifier)'") + } } else { .unknownError(error.localizedDescription) } diff --git a/peekaboo-cli/Sources/peekaboo/Models.swift b/peekaboo-cli/Sources/peekaboo/Models.swift index bc33452..0f967bc 100644 --- a/peekaboo-cli/Sources/peekaboo/Models.swift +++ b/peekaboo-cli/Sources/peekaboo/Models.swift @@ -29,6 +29,7 @@ enum ImageFormat: String, CaseIterable, ExpressibleByArgument { enum CaptureFocus: String, CaseIterable, ExpressibleByArgument { case background + case auto case foreground } diff --git a/peekaboo-cli/Sources/peekaboo/Version.swift b/peekaboo-cli/Sources/peekaboo/Version.swift index fb95bee..897261a 100644 --- a/peekaboo-cli/Sources/peekaboo/Version.swift +++ b/peekaboo-cli/Sources/peekaboo/Version.swift @@ -1,4 +1,4 @@ // This file is auto-generated by the build script. Do not edit manually. enum Version { - static let current = "1.0.0-beta.15" + static let current = "1.0.0-beta.16" } diff --git a/peekaboo-cli/Tests/peekabooTests/ImageCaptureLogicTests.swift b/peekaboo-cli/Tests/peekabooTests/ImageCaptureLogicTests.swift index f957118..31dbbba 100644 --- a/peekaboo-cli/Tests/peekabooTests/ImageCaptureLogicTests.swift +++ b/peekaboo-cli/Tests/peekabooTests/ImageCaptureLogicTests.swift @@ -153,14 +153,18 @@ struct ImageCaptureLogicTests { @Test("Capture focus modes", .tags(.fast)) func captureFocusModes() throws { - // Default background mode + // Default auto mode let defaultCmd = try ImageCommand.parse([]) - #expect(defaultCmd.captureFocus == .background) + #expect(defaultCmd.captureFocus == .auto) // Explicit background mode let backgroundCmd = try ImageCommand.parse(["--capture-focus", "background"]) #expect(backgroundCmd.captureFocus == .background) + // Auto mode + let autoCmd = try ImageCommand.parse(["--capture-focus", "auto"]) + #expect(autoCmd.captureFocus == .auto) + // Foreground mode let foregroundCmd = try ImageCommand.parse(["--capture-focus", "foreground"]) #expect(foregroundCmd.captureFocus == .foreground) @@ -411,12 +415,12 @@ struct AdvancedImageCaptureLogicTests { ]) #expect(foregroundWindow.captureFocus == .foreground) - // Background focus (default) should work without additional permissions - let backgroundCapture = try ImageCommand.parse([ + // Auto focus (default) should work intelligently + let autoCapture = try ImageCommand.parse([ "--mode", "window", "--app", "Finder" ]) - #expect(backgroundCapture.captureFocus == .background) + #expect(autoCapture.captureFocus == .auto) } @Test("Path handling edge cases", .tags(.fast)) @@ -452,7 +456,7 @@ struct AdvancedImageCaptureLogicTests { if scenario.shouldBeReady { // Verify basic readiness #expect(command.format == .png) - #expect(command.captureFocus == .background) + #expect(command.captureFocus == .auto) } } catch { if scenario.shouldBeReady { diff --git a/peekaboo-cli/Tests/peekabooTests/ImageCommandTests.swift b/peekaboo-cli/Tests/peekabooTests/ImageCommandTests.swift index a3be3e6..8fd9f17 100644 --- a/peekaboo-cli/Tests/peekabooTests/ImageCommandTests.swift +++ b/peekaboo-cli/Tests/peekabooTests/ImageCommandTests.swift @@ -27,7 +27,7 @@ struct ImageCommandTests { #expect(command.format == .png) #expect(command.path == nil) #expect(command.app == nil) - #expect(command.captureFocus == .background) + #expect(command.captureFocus == .auto) #expect(command.jsonOutput == false) } @@ -250,7 +250,7 @@ struct ImageCommandTests { #expect(command.windowTitle == nil) #expect(command.windowIndex == nil) #expect(command.screenIndex == nil) - #expect(command.captureFocus == .background) + #expect(command.captureFocus == .auto) #expect(command.jsonOutput == false) } diff --git a/peekaboo-cli/Tests/peekabooTests/ModelsTests.swift b/peekaboo-cli/Tests/peekabooTests/ModelsTests.swift index a3a9601..4ee047d 100644 --- a/peekaboo-cli/Tests/peekabooTests/ModelsTests.swift +++ b/peekaboo-cli/Tests/peekabooTests/ModelsTests.swift @@ -36,10 +36,12 @@ struct ModelsTests { func captureFocus() { // Test CaptureFocus enum values #expect(CaptureFocus.background.rawValue == "background") + #expect(CaptureFocus.auto.rawValue == "auto") #expect(CaptureFocus.foreground.rawValue == "foreground") // Test CaptureFocus from string #expect(CaptureFocus(rawValue: "background") == .background) + #expect(CaptureFocus(rawValue: "auto") == .auto) #expect(CaptureFocus(rawValue: "foreground") == .foreground) #expect(CaptureFocus(rawValue: "invalid") == nil) } @@ -78,7 +80,7 @@ struct ModelsTests { @Test("CaptureFocus raw values are valid", .tags(.fast)) func captureFocusRawValuesValid() { - let validValues = ["background", "foreground"] + let validValues = ["background", "auto", "foreground"] for rawValue in validValues { #expect(CaptureFocus(rawValue: rawValue) != nil) } diff --git a/src/tools/list.ts b/src/tools/list.ts index ac138f1..4b8ce22 100644 --- a/src/tools/list.ts +++ b/src/tools/list.ts @@ -72,7 +72,8 @@ export const listToolSchema = z .refine( (data) => data.item_type !== "server_status" || - (data.app === undefined && data.include_window_details === undefined), + (data.app === undefined && + (data.include_window_details === undefined || data.include_window_details.length === 0)), { message: "'app' and 'include_window_details' not applicable for 'server_status'.", diff --git a/src/types/index.ts b/src/types/index.ts index 774f853..549b332 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -145,12 +145,13 @@ export const imageToolSchema = z.object({ ), capture_focus: z.preprocess( (val) => (val === "" || val === null ? undefined : val), - z.enum(["background", "foreground"]) + z.enum(["background", "auto", "foreground"]) .optional() - .default("background") + .default("auto") .describe( - "Optional. Focus behavior. 'background' (default): capture without altering window focus. " + - "'foreground': bring target to front before capture." + "Optional. Focus behavior. 'auto' (default): bring target to front only if not already active. " + + "'background': capture without altering window focus. " + + "'foreground': always bring target to front before capture." ) ), }) diff --git a/src/utils/peekaboo-cli.ts b/src/utils/peekaboo-cli.ts index 0103c79..3a88331 100644 --- a/src/utils/peekaboo-cli.ts +++ b/src/utils/peekaboo-cli.ts @@ -58,10 +58,21 @@ function getInitializedSwiftCliPath(logger: Logger): string { function mapExitCodeToErrorMessage( exitCode: number, stderr: string, + command: 'image' | 'list', + appTarget?: string, ): { message: string, code: string } { const defaultMessage = stderr.trim() ? `Peekaboo CLI Error: ${stderr.trim()}` : `Swift CLI execution failed (exit code: ${exitCode})`; + + // Handle exit code 18 specially with command context + if (exitCode === 18) { + return { + message: `The specified application ('${appTarget || 'unknown'}') is not running or could not be found.`, + code: "SWIFT_CLI_APP_NOT_FOUND", + }; + } + const errorCodeMap: { [key: number]: { message: string, code: string } } = { 1: { message: "An unknown error occurred in the Swift CLI.", code: "SWIFT_CLI_UNKNOWN_ERROR" }, 7: { message: "The specified application is running but has no capturable windows. Try setting 'capture_focus' to 'foreground' to un-hide application windows.", code: "SWIFT_CLI_NO_WINDOWS_FOUND" }, @@ -82,10 +93,6 @@ function mapExitCodeToErrorMessage( message: "Failed to write the capture to a file. This is often a file permissions issue. Please ensure the application has permissions to write to the destination directory.", code: "SWIFT_CLI_FILE_WRITE_ERROR", }, - 18: { - message: "The specified application could not be found or is not running.", - code: "SWIFT_CLI_APP_NOT_FOUND", - }, 19: { message: "The specified window index is invalid.", code: "SWIFT_CLI_INVALID_WINDOW_INDEX" }, 20: { message: "Invalid argument provided to the Swift CLI.", code: "SWIFT_CLI_INVALID_ARGUMENT" }, }; @@ -145,7 +152,17 @@ export async function executeSwiftCli( "Swift CLI execution failed", ); - const { message, code } = mapExitCodeToErrorMessage(exitCode || 1, stderr); + // Determine command and app target from args + const command = args[0] as 'image' | 'list'; + let appTarget: string | undefined; + + // Find app target in args + const appIndex = args.indexOf('--app'); + if (appIndex !== -1 && appIndex < args.length - 1) { + appTarget = args[appIndex + 1]; + } + + const { message, code } = mapExitCodeToErrorMessage(exitCode || 1, stderr, command, appTarget); const errorDetails = stderr.trim() && stdout.trim() ? `Stdout: ${stdout.trim()}` diff --git a/tests/integration/image-tool.test.ts b/tests/integration/image-tool.test.ts index 3f28908..ac5f2b5 100644 --- a/tests/integration/image-tool.test.ts +++ b/tests/integration/image-tool.test.ts @@ -829,6 +829,31 @@ describe("Image Tool Integration Tests", () => { expect(result.isError).toBe(true); expect(result.content[0].text).toContain("Window index 999 is out of bounds for Finder"); }); + + it("should return a specific error when app is running but has no windows", async () => { + // Arrange + mockResolveImagePath.mockResolvedValue({ + effectivePath: '/mock/path', + tempDirUsed: undefined, + }); + mockExecuteSwiftCli.mockResolvedValue({ + success: false, + error: { + message: "The specified application is running but has no capturable windows. Try setting 'capture_focus' to 'foreground' to un-hide application windows.", + code: "SWIFT_CLI_NO_WINDOWS_FOUND" + }, + }); + const args = { app_target: "Xcode", capture_focus: "background" }; + + // Act + const result = await imageToolHandler(args, mockContext); + + // Assert + expect(result.isError).toBe(true); + expect(result.content[0].text).toBe( + "Image capture failed: The specified application is running but has no capturable windows. Try setting 'capture_focus' to 'foreground' to un-hide application windows." + ); + }); }); describe("Environment variable handling", () => { diff --git a/tests/integration/peekaboo-cli-integration.test.ts b/tests/integration/peekaboo-cli-integration.test.ts index 1e07033..8f85d43 100644 --- a/tests/integration/peekaboo-cli-integration.test.ts +++ b/tests/integration/peekaboo-cli-integration.test.ts @@ -189,7 +189,7 @@ describe("Swift CLI Integration Tests", () => { const firstContentItem = response.content[0] as PeekabooContentItem; // Expect the generic failure message from the handler when Swift CLI fails expect(firstContentItem.text?.toLowerCase()).toMatch( - /list operation failed: (swift cli execution failed|an unknown error occurred)/i, + /list operation failed: (swift cli execution failed|an unknown error occurred|.*could not be found)/i, ); } }, 15000); diff --git a/tests/unit/tools/image.test.ts b/tests/unit/tools/image.test.ts index ccabe8f..36e8e7f 100644 --- a/tests/unit/tools/image.test.ts +++ b/tests/unit/tools/image.test.ts @@ -1,10 +1,7 @@ -import { vi } from "vitest"; +import { describe, it, expect, beforeEach, vi } from "vitest"; import { imageToolHandler } from "../../../src/tools/image"; import { buildSwiftCliArgs, resolveImagePath } from "../../../src/utils/image-cli-args"; -import { - executeSwiftCli, - readImageAsBase64, -} from "../../../src/utils/peekaboo-cli"; +import { executeSwiftCli, readImageAsBase64 } from "../../../src/utils/peekaboo-cli"; import { mockSwiftCli } from "../../mocks/peekaboo-cli.mock"; import { pino } from "pino"; import { @@ -44,16 +41,16 @@ vi.mock("../../../src/utils/ai-providers", () => ({ analyzeImageWithProvider: vi.fn(), })); +import { performAutomaticAnalysis } from "../../../src/utils/image-analysis"; +import { parseAIProviders } from "../../../src/utils/ai-providers"; + const mockExecuteSwiftCli = executeSwiftCli as vi.MockedFunction< typeof executeSwiftCli >; const mockReadImageAsBase64 = readImageAsBase64 as vi.MockedFunction< typeof readImageAsBase64 >; -import { performAutomaticAnalysis } from "../../../src/utils/image-analysis"; const mockPerformAutomaticAnalysis = performAutomaticAnalysis as vi.MockedFunction; - -import { parseAIProviders } from "../../../src/utils/ai-providers"; const mockParseAIProviders = parseAIProviders as vi.MockedFunction; const mockFsRm = fs.rm as vi.MockedFunction; @@ -401,6 +398,48 @@ describe("Image Tool", () => { mockLogger, ); }); + + it("should handle capture_focus auto mode", async () => { + // Mock resolveImagePath for minimal case + mockResolveImagePath.mockResolvedValue({ + effectivePath: MOCK_TEMP_IMAGE_DIR, + tempDirUsed: MOCK_TEMP_IMAGE_DIR, + }); + + const mockResponse = mockSwiftCli.captureImage("screen", {}); + mockExecuteSwiftCli.mockResolvedValue(mockResponse); + + await imageToolHandler( + { capture_focus: "auto" }, + mockContext, + ); + + expect(mockExecuteSwiftCli).toHaveBeenCalledWith( + expect.arrayContaining(["--capture-focus", "auto"]), + mockLogger, + ); + }); + + it("should default to background capture_focus when not specified", async () => { + // Mock resolveImagePath for minimal case + mockResolveImagePath.mockResolvedValue({ + effectivePath: MOCK_TEMP_IMAGE_DIR, + tempDirUsed: MOCK_TEMP_IMAGE_DIR, + }); + + const mockResponse = mockSwiftCli.captureImage("screen", {}); + mockExecuteSwiftCli.mockResolvedValue(mockResponse); + + await imageToolHandler( + {}, + mockContext, + ); + + expect(mockExecuteSwiftCli).toHaveBeenCalledWith( + expect.arrayContaining(["--capture-focus", "background"]), + mockLogger, + ); + }); }); describe("imageToolHandler - Capture and Analyze", () => { @@ -826,6 +865,26 @@ describe("Image Tool", () => { it("should default to background focus when capture_focus is an empty string", () => { const args = buildSwiftCliArgs({ capture_focus: "" }, undefined); + expect(args).toEqual([ + "image", + "--mode", + "screen", + "--format", + "png", + "--capture-focus", + "background" + ]); + }); + + it("should include capture_focus auto mode", () => { + const args = buildSwiftCliArgs({ capture_focus: "auto" }, undefined); + expect(args).toEqual( + expect.arrayContaining(["--capture-focus", "auto"]), + ); + }); + + it("should default to background focus when capture_focus is not provided", () => { + const args = buildSwiftCliArgs({}, undefined); expect(args).toEqual( expect.arrayContaining(["--capture-focus", "background"]), ); diff --git a/tests/unit/tools/list.test.ts b/tests/unit/tools/list.test.ts index 65d8333..fbba125 100644 --- a/tests/unit/tools/list.test.ts +++ b/tests/unit/tools/list.test.ts @@ -270,6 +270,28 @@ describe("List Tool", () => { expect((result as any)._meta.backend_error_code).toBe("APP_NOT_FOUND"); }); + it("should return a specific error if the app is not found", async () => { + // Arrange + mockExecuteSwiftCli.mockResolvedValue({ + success: false, + error: { + message: "The specified application ('Ciursor') is not running or could not be found.", + code: "SWIFT_CLI_APP_NOT_FOUND", + details: "Error: Application with name 'Ciursor' not found.", + }, + }); + const args = { item_type: "application_windows", app: "Ciursor" } as ListToolInput; + + // Act + const result = await listToolHandler(args, mockContext); + + // Assert + expect(result.isError).toBe(true); + expect(result.content[0].text).toBe( + "List operation failed: The specified application ('Ciursor') is not running or could not be found." + ); + }); + it("should handle Swift CLI errors with no message or code", async () => { mockExecuteSwiftCli.mockResolvedValue({ success: false, @@ -791,19 +813,34 @@ describe("List Tool", () => { } }); - it("should fail when item_type is 'server_status' and 'include_window_details' is provided", () => { + it("should fail when item_type is 'server_status' and 'include_window_details' has values", () => { const result = listToolSchema.safeParse({ item_type: "server_status", include_window_details: ["bounds"], }); expect(result.success).toBe(false); if (!result.success) { - expect(result.error.flatten().fieldErrors.include_window_details).toEqual([ - "'include_window_details' is only applicable when 'item_type' is 'application_windows' or when 'app' is provided.", + expect(result.error.flatten().fieldErrors.item_type).toEqual([ + "'app' and 'include_window_details' not applicable for 'server_status'.", ]); } }); + it("should succeed when item_type is 'server_status' and 'include_window_details' is empty", () => { + const result = listToolSchema.safeParse({ + item_type: "server_status", + include_window_details: [], + }); + expect(result.success).toBe(true); + }); + + it("should succeed when item_type is 'server_status' without extra parameters", () => { + const result = listToolSchema.safeParse({ + item_type: "server_status", + }); + expect(result.success).toBe(true); + }); + it("should succeed when item_type is 'application_windows' and 'include_window_details' is provided", () => { const input = { item_type: "application_windows", diff --git a/tests/unit/utils/peekaboo-cli.test.ts b/tests/unit/utils/peekaboo-cli.test.ts index 6cf906f..7c95b1e 100644 --- a/tests/unit/utils/peekaboo-cli.test.ts +++ b/tests/unit/utils/peekaboo-cli.test.ts @@ -343,7 +343,7 @@ describe("Swift CLI Utility", () => { expect(result.success).toBe(false); expect(result.error?.code).toBe("SWIFT_CLI_APP_NOT_FOUND"); expect(result.error?.message).toContain( - "The specified application could not be found", + "could not be found", ); });