diff --git a/CHANGELOG.md b/CHANGELOG.md index f4e6190..b80a89f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added +- Restriction on using `format: "data"` for screen captures to prevent JavaScript stack overflow errors + - Screen captures must use `format: "png"` or omit the format parameter + - Application window captures can still use `format: "data"` + ## [1.0.0-beta.18] - 2025-06-08 ### Added diff --git a/README.md b/README.md index 9934054..4330f48 100644 --- a/README.md +++ b/README.md @@ -289,15 +289,17 @@ Peekaboo provides three main tools for AI agents: Captures macOS screen content with automatic shadow/frame removal. +**Important:** Screen captures cannot use `format: "data"` due to the large size of screen images causing JavaScript stack overflow errors. Always use `format: "png"` (or omit format) with a `path` for screen captures. + **Examples:** ```javascript -// Capture entire screen +// Capture entire screen (must save to file) await use_mcp_tool("peekaboo", "image", { app_target: "screen:0", path: "~/Desktop/screenshot.png" }); -// Capture specific app window with analysis +// Capture specific app window with analysis (can use format: "data") await use_mcp_tool("peekaboo", "image", { app_target: "Safari", question: "What website is currently open?", diff --git a/peekaboo-cli/Sources/peekaboo/Version.swift b/peekaboo-cli/Sources/peekaboo/Version.swift index d531b5e..f4521f9 100644 --- a/peekaboo-cli/Sources/peekaboo/Version.swift +++ b/peekaboo-cli/Sources/peekaboo/Version.swift @@ -1,4 +1,4 @@ // This file is auto-generated by the build script. Do not edit manually. enum Version { - static let current = "1.0.0-beta.17" + static let current = "1.0.0-beta.18" } diff --git a/src/tools/image.ts b/src/tools/image.ts index a82baf5..38e3f09 100644 --- a/src/tools/image.ts +++ b/src/tools/image.ts @@ -28,6 +28,23 @@ export async function imageToolHandler( try { logger.debug({ input }, "Processing peekaboo.image tool call"); + // Validate format restrictions for screen captures + const isScreenCapture = !input.app_target || input.app_target.startsWith("screen:"); + if (isScreenCapture && input.format === "data") { + logger.warn("Screen capture with format 'data' is not allowed due to size constraints"); + return { + content: [ + { + type: "text", + text: "Screen captures cannot use format 'data' because they produce images too large for base64 encoding. " + + "Please use format 'png' to save to a file instead.", + }, + ], + isError: true, + _meta: { backend_error_code: "FORMAT_NOT_ALLOWED_FOR_SCREEN" }, + }; + } + // Determine effective path and format for Swift CLI const swiftFormat = input.format === "data" ? "png" : (input.format || "png"); diff --git a/tests/unit/tools/image.test.ts b/tests/unit/tools/image.test.ts index 2530a3c..949a0e2 100644 --- a/tests/unit/tools/image.test.ts +++ b/tests/unit/tools/image.test.ts @@ -109,14 +109,26 @@ describe("Image Tool", () => { expect(mockFsRm).not.toHaveBeenCalled(); }); - it("should capture screen with format: 'data'", async () => { + it("should reject screen capture with format: 'data'", async () => { + const result = await imageToolHandler( + { format: "data" }, + mockContext, + ); + + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain("Screen captures cannot use format 'data'"); + expect(result._meta?.backend_error_code).toBe("FORMAT_NOT_ALLOWED_FOR_SCREEN"); + expect(mockExecuteSwiftCli).not.toHaveBeenCalled(); + }); + + it("should allow app capture with format: 'data'", async () => { // Mock resolveImagePath to return a temp directory for format: "data" mockResolveImagePath.mockResolvedValue({ effectivePath: MOCK_TEMP_IMAGE_DIR, tempDirUsed: MOCK_TEMP_IMAGE_DIR, }); - const mockResponse = mockSwiftCli.captureImage("screen", { + const mockResponse = mockSwiftCli.captureImage("Safari", { path: MOCK_SAVED_FILE_PATH, format: "png", }); @@ -124,7 +136,7 @@ describe("Image Tool", () => { mockReadImageAsBase64.mockResolvedValue("base64imagedata"); const result = await imageToolHandler( - { format: "data" }, + { app_target: "Safari", format: "data" }, mockContext, ); @@ -140,7 +152,7 @@ describe("Image Tool", () => { expect(mockFsRm).not.toHaveBeenCalled(); }); - it("should save file and return base64 when format: 'data' with path", async () => { + it("should save file and return base64 when format: 'data' with path for app capture", async () => { const userPath = "/user/test.png"; // Mock resolveImagePath to return the user path (no temp dir) mockResolveImagePath.mockResolvedValue({ @@ -151,7 +163,7 @@ describe("Image Tool", () => { const mockSavedFile: SavedFile = { path: userPath, mime_type: "image/png", - item_label: "Screen 1", + item_label: "Safari", }; const mockResponse = { success: true, @@ -162,7 +174,7 @@ describe("Image Tool", () => { mockReadImageAsBase64.mockResolvedValue("base64imagedata"); const result = await imageToolHandler( - { format: "data", path: userPath }, + { app_target: "Safari", format: "data", path: userPath }, mockContext, ); @@ -644,7 +656,7 @@ describe("Image Tool", () => { tempDirUsed: MOCK_TEMP_IMAGE_DIR, }); - const mockCliResponse = mockSwiftCli.captureImage("screen", { + const mockCliResponse = mockSwiftCli.captureImage("Safari", { path: MOCK_SAVED_FILE_PATH, format: "png", }); @@ -652,6 +664,7 @@ describe("Image Tool", () => { const result = await imageToolHandler( { + app_target: "Safari", // Use app capture to allow format: "data" question: MOCK_QUESTION, format: "data", // Even with format: "data" },