diff --git a/CHANGELOG.md b/CHANGELOG.md index 0915018..bc95305 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added +- **PID-based application targeting**: You can now target applications by their Process ID using the `PID:XXXX` syntax + - Works with both `image` and `list` tools + - Example: `app_target: "PID:663"` to capture windows from process 663 + - Provides clear error messages for invalid PIDs or non-existent processes + - Useful for targeting specific instances when multiple copies of an app are running + ## [1.0.1] - 2025-01-08 ### Fixed diff --git a/README.md b/README.md index 0122bf4..9e3e2bc 100644 --- a/README.md +++ b/README.md @@ -319,6 +319,12 @@ await use_mcp_tool("peekaboo", "image", { app_target: "frontmost", format: "png" }); + +// Capture by Process ID (useful for multiple instances) +await use_mcp_tool("peekaboo", "image", { + app_target: "PID:663", + path: "~/Desktop/process.png" +}); ``` #### Browser Helper Filtering @@ -360,6 +366,12 @@ await use_mcp_tool("peekaboo", "list", { app: "Preview" }); +// List windows by Process ID +await use_mcp_tool("peekaboo", "list", { + item_type: "application_windows", + app: "PID:663" +}); + // Check server status await use_mcp_tool("peekaboo", "list", { item_type: "server_status" @@ -564,8 +576,9 @@ Captures macOS screen content and optionally analyzes it. Window shadows/frames * `app_target` (string, optional): Specifies the capture target. If omitted or empty, captures all screens. * Examples: * `"screen:INDEX"`: Captures the screen at the specified zero-based index (e.g., `"screen:0"`). (Note: Index selection from multiple screens is planned for full support in the Swift CLI). - * `"frontmost"`: Aims to capture all windows of the current foreground application. (Note: This is a complex scenario; current implementation may default to screen capture if the exact foreground app cannot be reliably determined by the Node.js layer alone). + * `"frontmost"`: Captures the frontmost window of the currently active application. * `"AppName"`: Captures all windows of the application named `AppName` (e.g., `"Safari"`, `"com.apple.Safari"`). Fuzzy matching is used. + * `"PID:ProcessID"`: Captures all windows of the application with the specified process ID (e.g., `"PID:663"`). Useful when multiple instances of the same app are running. * `"AppName:WINDOW_TITLE:Title"`: Captures the window of `AppName` that has the specified `Title` (e.g., `"Notes:WINDOW_TITLE:My Important Note"`). * `"AppName:WINDOW_INDEX:Index"`: Captures the window of `AppName` at the specified zero-based `Index` (e.g., `"Preview:WINDOW_INDEX:0"` for the frontmost window of Preview). * `path` (string, optional): Base absolute path for saving the captured image(s). If `format` is `"data"` and `path` is also provided, the image is saved to this path (as a PNG) AND Base64 data is returned. If a `question` is provided and `path` is omitted, a temporary path is used for capture, and the file is deleted after analysis. @@ -604,9 +617,10 @@ For detailed parameter documentation, see [docs/spec.md](./docs/spec.md). - **Permission checking**: Automatic verification of required permissions ### Window Management -- **Application listing**: Complete list of running applications +- **Application listing**: Complete list of running applications with PIDs - **Window enumeration**: List all windows for specific apps -- **Flexible matching**: Find apps by partial name, bundle ID, or PID +- **Flexible matching**: Find apps by partial name, bundle ID, or Process ID +- **PID targeting**: Target specific processes using `PID:XXX` syntax - **Status monitoring**: Active/inactive status, window counts ### AI Integration diff --git a/docs/spec.md b/docs/spec.md index 061043f..ed242e7 100644 --- a/docs/spec.md +++ b/docs/spec.md @@ -132,6 +132,7 @@ Configured AI Providers (from PEEKABOO_AI_PROVIDERS ENV): 1 }) else { + // No multiple instances found, skip test + return + } + + // Pick the first instance + let targetApp = apps[0] + let pid = targetApp.processIdentifier + + // Create image command with specific PID + var command = ImageCommand() + command.app = "PID:\(pid)" + command.mode = .multi + command.format = .png + command.path = NSTemporaryDirectory() + command.jsonOutput = true + + do { + let result = try await captureWithPID(command: command, targetPID: pid) + + #expect(result.success == true) + // Since we're mocking, we know data contains windows from specific PID + #expect(result.data != nil) + } catch { + Issue.record("Failed to capture specific instance by PID: \(error)") + } + } + + @Test("Invalid PID formats in image capture") + func invalidPIDFormatsInImageCapture() throws { + let invalidPIDs = [ + "PID:", // Missing PID number + "PID:abc", // Non-numeric PID + "PID:-123", // Negative PID + "PID:12.34", // Decimal PID + "PID:0", // Zero PID + "PID:999999999" // Very large PID + ] + + for invalidPID in invalidPIDs { + var command = ImageCommand() + command.app = invalidPID + command.mode = .window + command.format = .png + command.jsonOutput = true + + // The command should parse but fail during execution + #expect(command.app == invalidPID) + + // In actual execution, this would fail with APP_NOT_FOUND error + // Here we just verify the command accepts the PID format + } + } + + @Test("PID targeting with window specifiers") + func pidTargetingWithWindowSpecifiers() throws { + // Test that PID can be combined with window index + var command1 = ImageCommand() + command1.app = "PID:1234" + command1.windowIndex = 0 + command1.mode = .window + + #expect(command1.app == "PID:1234") + #expect(command1.windowIndex == 0) + + // Test that PID can be combined with window title + var command2 = ImageCommand() + command2.app = "PID:5678" + command2.windowTitle = "Document" + command2.mode = .window + + #expect(command2.app == "PID:5678") + #expect(command2.windowTitle == "Document") + } + + @Test("PID targeting filename generation") + func pidTargetingFilenameGeneration() throws { + // Test that filenames include PID information + let pid: pid_t = 1234 + let appName = "TestApp" + let timestamp = "20250608_120000" + + // Expected filename format for PID capture + let expectedFilename = "\(appName)_PID_\(pid)_\(timestamp).png" + + // Verify filename pattern + #expect(expectedFilename.contains("PID")) + #expect(expectedFilename.contains(String(pid))) + #expect(expectedFilename.contains(appName)) + } + + // Helper function to simulate capture with PID + private func captureWithPID(command: ImageCommand, targetPID: pid_t) async throws -> JSONResponse { + // In real execution, this would use WindowCapture.captureWindows + // For testing, we simulate the response + + guard let app = NSRunningApplication(processIdentifier: targetPID) else { + throw ApplicationError.notFound("No application found with PID: \(targetPID)") + } + + let savedFile = SavedFile( + path: "\(command.path ?? NSTemporaryDirectory())/\(app.localizedName ?? "Unknown")_PID_\(targetPID).png", + item_label: app.localizedName ?? "Unknown", + window_title: nil, + window_id: nil, + window_index: nil, + mime_type: "image/png" + ) + + let captureData = ImageCaptureData(saved_files: [savedFile]) + + return JSONResponse( + success: true, + data: captureData, + messages: ["Captured windows for PID: \(targetPID)"], + debugLogs: [], + error: nil + ) + } +} \ No newline at end of file diff --git a/peekaboo-cli/Tests/peekabooTests/PIDTargetingTests.swift b/peekaboo-cli/Tests/peekabooTests/PIDTargetingTests.swift new file mode 100644 index 0000000..27fa7b0 --- /dev/null +++ b/peekaboo-cli/Tests/peekabooTests/PIDTargetingTests.swift @@ -0,0 +1,64 @@ +import Foundation +import AppKit +import Testing +@testable import peekaboo + +@Suite("PID Targeting Tests") +struct PIDTargetingTests { + @Test("Find application by valid PID", .enabled(if: ProcessInfo.processInfo.environment["CI"] == nil)) + func findByValidPID() throws { + + // Get any running application + let runningApps = NSWorkspace.shared.runningApplications + guard let testApp = runningApps.first(where: { $0.localizedName != nil }) else { + Issue.record("No running applications found for testing") + return + } + + let pid = testApp.processIdentifier + let identifier = "PID:\(pid)" + + do { + let foundApp = try ApplicationFinder.findApplication(identifier: identifier) + #expect(foundApp.processIdentifier == pid) + #expect(foundApp.bundleIdentifier == testApp.bundleIdentifier) + } catch { + Issue.record("Failed to find application by PID: \(error)") + } + } + + @Test("Invalid PID format throws error") + func invalidPIDFormat() throws { + // Test various invalid PID formats + let invalidPIDs = [ + "PID:", // Missing PID number + "PID:abc", // Non-numeric PID + "PID:-123", // Negative PID + "PID:12.34", // Decimal PID + "PID:999999999" // Very large PID (likely non-existent) + ] + + for invalidPID in invalidPIDs { + #expect(throws: ApplicationError.self) { + _ = try ApplicationFinder.findApplication(identifier: invalidPID) + } + } + } + + @Test("Non-existent PID throws notFound error") + func nonExistentPID() throws { + // Use a very high PID number that's unlikely to exist + let identifier = "PID:99999" + + do { + _ = try ApplicationFinder.findApplication(identifier: identifier) + Issue.record("Expected error for non-existent PID") + } catch ApplicationError.notFound(let message) { + // The message should contain information about the PID + #expect(message.contains("99999") || message == identifier, + "Error message '\(message)' should mention PID 99999") + } catch { + Issue.record("Unexpected error: \(error)") + } + } +} \ No newline at end of file diff --git a/peekaboo-cli/Tests/peekabooTests/PIDWindowsSubcommandTests.swift b/peekaboo-cli/Tests/peekabooTests/PIDWindowsSubcommandTests.swift new file mode 100644 index 0000000..2932e68 --- /dev/null +++ b/peekaboo-cli/Tests/peekabooTests/PIDWindowsSubcommandTests.swift @@ -0,0 +1,117 @@ +import Foundation +import AppKit +import Testing +import ArgumentParser +@testable import peekaboo + +@Suite("PID Windows Subcommand Tests") +struct PIDWindowsSubcommandTests { + @Test("Parse windows subcommand with PID") + func parseWindowsSubcommandWithPID() throws { + // Test parsing windows subcommand with PID + let command = try WindowsSubcommand.parse([ + "--app", "PID:1234", + "--json-output" + ]) + + #expect(command.app == "PID:1234") + #expect(command.jsonOutput == true) + } + + @Test("Parse windows subcommand with PID and details") + func parseWindowsSubcommandWithPIDAndDetails() throws { + // Test windows subcommand with PID and window details + let command = try WindowsSubcommand.parse([ + "--app", "PID:5678", + "--include-details", "ids,bounds,off_screen", + "--json-output" + ]) + + #expect(command.app == "PID:5678") + #expect(command.includeDetails == "ids,bounds,off_screen") + #expect(command.jsonOutput == true) + } + + @Test("Various PID formats in windows subcommand") + func variousPIDFormatsInWindowsSubcommand() throws { + let pidFormats = [ + "PID:1", // Single digit + "PID:123", // Three digits + "PID:99999", // Large PID + ] + + for pidFormat in pidFormats { + let command = try WindowsSubcommand.parse([ + "--app", pidFormat + ]) + + #expect(command.app == pidFormat) + } + } + + @Test("ApplicationInfo includes PID") + func applicationInfoIncludesPID() throws { + // Verify that ApplicationInfo includes PID + let appInfo = ApplicationInfo( + app_name: "TestApp", + bundle_id: "com.test.app", + pid: 1234, + is_active: false, + window_count: 2 + ) + + #expect(appInfo.pid == 1234) + #expect(appInfo.app_name == "TestApp") + + // Test JSON encoding includes PID + let encoder = JSONEncoder() + let data = try encoder.encode(appInfo) + let json = String(data: data, encoding: .utf8) ?? "" + + #expect(json.contains("\"pid\":1234")) + } + + @Test("TargetApplicationInfo includes PID") + func targetApplicationInfoIncludesPID() throws { + // Test that window list response includes target app PID + let targetAppInfo = TargetApplicationInfo( + app_name: "Safari", + bundle_id: "com.apple.Safari", + pid: 5678 + ) + + #expect(targetAppInfo.pid == 5678) + + // Test JSON encoding + let encoder = JSONEncoder() + let data = try encoder.encode(targetAppInfo) + let json = String(data: data, encoding: .utf8) ?? "" + + #expect(json.contains("\"pid\":5678")) + } + + @Test("WindowListData structure with PID") + func windowListDataStructureWithPID() throws { + let targetAppInfo = TargetApplicationInfo( + app_name: "Terminal", + bundle_id: "com.apple.Terminal", + pid: 9999 + ) + + let windowInfo = WindowInfo( + window_title: "~/Projects", + window_id: 456, + window_index: 0, + bounds: nil, + is_on_screen: true + ) + + let windowListData = WindowListData( + windows: [windowInfo], + target_application_info: targetAppInfo + ) + + #expect(windowListData.target_application_info.pid == 9999) + #expect(windowListData.windows.count == 1) + } +} \ No newline at end of file diff --git a/src/tools/list.ts b/src/tools/list.ts index c688cab..97dcae6 100644 --- a/src/tools/list.ts +++ b/src/tools/list.ts @@ -43,8 +43,8 @@ export const listToolSchema = z .optional() .describe( "Required when `item_type` is `application_windows`. " + - "Specifies the target application by its name (e.g., \"Safari\", \"TextEdit\") or bundle ID. " + - "Fuzzy matching is used, so partial names may work.", + "Specifies the target application by its name (e.g., \"Safari\", \"TextEdit\"), bundle ID, or process ID (e.g., \"PID:663\"). " + + "Fuzzy matching is used for names, so partial names may work.", ), include_window_details: z.preprocess( (val) => { diff --git a/src/types/index.ts b/src/types/index.ts index 98b5ce9..0143cf8 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -121,6 +121,7 @@ export const imageToolSchema = z.object({ "Use `'screen:INDEX'` (e.g., `'screen:0'`) for a specific display.\n" + "Use `'frontmost'` for all windows of the current foreground application.\n" + "Use `'AppName'` (e.g., `'Safari'`) for all windows of that application.\n" + + "Use `'PID:PROCESS_ID'` (e.g., `'PID:663'`) to target a specific process by its PID.\n" + "Use `'AppName:WINDOW_TITLE:Title'` (e.g., `'TextEdit:WINDOW_TITLE:My Notes'`) for a window of 'AppName' matching that title.\n" + "Use `'AppName:WINDOW_INDEX:Index'` (e.g., `'Preview:WINDOW_INDEX:0'`) for a window of 'AppName' at that index.\n" + "Ensure components are correctly colon-separated.", diff --git a/tests/unit/tools/pid-targeting.test.ts b/tests/unit/tools/pid-targeting.test.ts new file mode 100644 index 0000000..581f452 --- /dev/null +++ b/tests/unit/tools/pid-targeting.test.ts @@ -0,0 +1,138 @@ +import { describe, it, expect, beforeEach, vi } from "vitest"; +import { imageToolHandler } from "../../../src/tools/image"; +import * as peekabooCliModule from "../../../src/utils/peekaboo-cli"; +import type { SwiftCliResponse } from "../../../src/types"; +import type { ToolContext } from "@modelcontextprotocol/sdk/types"; +import pino from "pino"; + +// Mock the peekaboo-cli module +vi.mock("../../../src/utils/peekaboo-cli"); + +// Create a mock context +const mockContext: ToolContext = { + logger: pino({ level: "silent" }), +}; + +describe("PID Targeting Tests", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it("should handle PID targeting correctly", async () => { + const mockResponse: SwiftCliResponse = { + success: true, + data: { + saved_files: [ + { + path: "/tmp/test_PID_663.png", + item_label: "Ghostty", + mime_type: "image/png", + }, + ], + }, + }; + + vi.mocked(peekabooCliModule.executeSwiftCli).mockResolvedValue(mockResponse); + + const result = await imageToolHandler( + { + app_target: "PID:663", + path: "/tmp/test.png", + }, + mockContext, + ); + + expect(result.content).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + type: "text", + text: expect.stringContaining("Captured 1 image"), + }), + ]), + ); + expect(result.saved_files).toHaveLength(1); + expect(result.saved_files![0].path).toBe("/tmp/test_PID_663.png"); + }); + + it("should handle invalid PID format", async () => { + const mockResponse: SwiftCliResponse = { + success: false, + error: { + code: "APP_NOT_FOUND", + message: "Invalid PID format: PID:abc", + }, + }; + + vi.mocked(peekabooCliModule.executeSwiftCli).mockResolvedValue(mockResponse); + + const result = await imageToolHandler( + { + app_target: "PID:abc", + }, + mockContext, + ); + + expect(result.isError).toBe(true); + expect(result.content[0]).toMatchObject({ + type: "text", + text: expect.stringContaining("Invalid PID format"), + }); + }); + + it("should handle non-existent PID", async () => { + const mockResponse: SwiftCliResponse = { + success: false, + error: { + code: "APP_NOT_FOUND", + message: "No application found with PID: 99999", + }, + }; + + vi.mocked(peekabooCliModule.executeSwiftCli).mockResolvedValue(mockResponse); + + const result = await imageToolHandler( + { + app_target: "PID:99999", + }, + mockContext, + ); + + expect(result.isError).toBe(true); + expect(result.content[0]).toMatchObject({ + type: "text", + text: expect.stringContaining("No application found with PID"), + }); + }); + + it("should pass PID targeting to Swift CLI correctly", async () => { + const mockResponse: SwiftCliResponse = { + success: true, + data: { + images: [ + { + path: "/tmp/test.png", + item_label: "Some App", + mime_type: "image/png", + }, + ], + }, + }; + + vi.mocked(peekabooCliModule.executeSwiftCli).mockResolvedValue(mockResponse); + + await imageToolHandler( + { + app_target: "PID:1234", + path: "/tmp/test.png", + }, + mockContext, + ); + + // Verify the Swift CLI was called with the PID target + expect(peekabooCliModule.executeSwiftCli).toHaveBeenCalledWith( + expect.arrayContaining(["image", "--app", "PID:1234"]), + expect.anything(), + expect.anything(), + ); + }); +}); \ No newline at end of file