diff --git a/README.md b/README.md index e3eb54f..3a6b802 100644 --- a/README.md +++ b/README.md @@ -60,8 +60,10 @@ You can configure Peekaboo with environment variables in your Claude Desktop con "env": { "AI_PROVIDERS": "[{\"type\":\"ollama\",\"baseUrl\":\"http://localhost:11434\",\"model\":\"llava\",\"enabled\":true}]", "LOG_LEVEL": "INFO", - "PEEKABOO_LOG_FILE": "/tmp/peekaboo-mcp.log", - "PEEKABOO_DEFAULT_SAVE_PATH": "~/Pictures/Screenshots" + "LOG_FILE": "/tmp/peekaboo-mcp.log", + "DEFAULT_SAVE_PATH": "~/Pictures/Screenshots", + "CONSOLE_LOGGING": "true", + "CLI_PATH": "/usr/local/bin/peekaboo_custom" } } } @@ -74,8 +76,10 @@ You can configure Peekaboo with environment variables in your Claude Desktop con |----------|-------------|---------| | `AI_PROVIDERS` | JSON array of AI provider configurations | `[]` | | `LOG_LEVEL` | Logging level (DEBUG, INFO, WARN, ERROR) | `INFO` | -| `PEEKABOO_LOG_FILE` | Log file path | `/tmp/peekaboo-mcp.log` | -| `PEEKABOO_DEFAULT_SAVE_PATH` | Default screenshot save location | `~/Pictures/Screenshots` | +| `LOG_FILE` | Path to the server's log file. | `path.join(os.tmpdir(), 'peekaboo-mcp.log')` | +| `DEFAULT_SAVE_PATH` | Default base absolute path for saving images captured by `peekaboo.image` if not specified in the tool input. If this ENV is also not set, the Swift CLI will use its own temporary directory logic. | (none, Swift CLI uses temp paths) | +| `CONSOLE_LOGGING` | Boolean (`"true"`/`"false"`) for dev console logs. | `"false"` | +| `CLI_PATH` | Optional override for Swift `peekaboo` CLI path. | (bundled CLI) | #### AI Provider Configuration @@ -233,19 +237,46 @@ cd .. npm link ``` -Then configure Claude Desktop to use your local installation: +Then configure Claude Desktop (or a similar MCP client) to use your local installation. If you used `npm link`, the command `peekaboo-mcp` should be globally available. If you prefer to run directly via `node`: +**Example MCP Client Configuration (using local build):** + +If you ran `npm link` and `peekaboo-mcp` is in your PATH: ```json { "mcpServers": { - "peekaboo": { + "peekaboo_local": { "command": "peekaboo-mcp", - "args": [] + "args": [], + "env": { + "LOG_LEVEL": "debug", + "CONSOLE_LOGGING": "true" + } } } } ``` +Alternatively, running directly with `node`: +```json +{ + "mcpServers": { + "peekaboo_local_node": { + "command": "node", + "args": [ + "/Users/steipete/Projects/Peekaboo/dist/index.js" + ], + "env": { + "LOG_LEVEL": "debug", + "CONSOLE_LOGGING": "true" + } + } + } +} +``` +Remember to replace `/Users/steipete/Projects/Peekaboo/dist/index.js` with the actual absolute path to the `dist/index.js` in your cloned project if it differs. +Also, when using these local configurations, ensure you use a distinct key (like "peekaboo_local" or "peekaboo_local_node") in your MCP client's server list to avoid conflicts if you also have the npx-based "peekaboo" server configured. + ### Using AppleScript For basic screen capture without the full MCP server, you can use the included AppleScript: @@ -279,7 +310,7 @@ For MCP clients other than Claude Desktop: Once installed, Peekaboo provides three powerful MCP tools: -### 📸 `peekaboo.image` - Screen Capture +### 📸 `image` - Screen Capture **Parameters:** - `mode`: `"screen"` | `"window"` | `"multi"` (default: "screen") @@ -289,7 +320,7 @@ Once installed, Peekaboo provides three powerful MCP tools: **Example:** ```json { - "name": "peekaboo.image", + "name": "image", "arguments": { "mode": "window", "app": "Safari" @@ -297,7 +328,7 @@ Once installed, Peekaboo provides three powerful MCP tools: } ``` -### 📋 `peekaboo.list` - Application Listing +### 📋 `list` - Application Listing **Parameters:** - `item_type`: `"running_applications"` | `"application_windows"` | `"server_status"` @@ -306,14 +337,14 @@ Once installed, Peekaboo provides three powerful MCP tools: **Example:** ```json { - "name": "peekaboo.list", + "name": "list", "arguments": { "item_type": "running_applications" } } ``` -### 🧩 `peekaboo.analyze` - AI Analysis +### 🧩 `analyze` - AI Analysis **Parameters:** - `image_path`: Absolute path to image file @@ -322,7 +353,7 @@ Once installed, Peekaboo provides three powerful MCP tools: **Example:** ```json { - "name": "peekaboo.analyze", + "name": "analyze", "arguments": { "image_path": "/tmp/screenshot.png", "question": "What applications are visible in this screenshot?" @@ -427,7 +458,7 @@ Peekaboo respects macOS security by: echo '{"jsonrpc": "2.0", "id": 1, "method": "tools/list"}' | node dist/index.js # Test image capture -echo '{"jsonrpc": "2.0", "id": 2, "method": "tools/call", "params": {"name": "peekaboo.image", "arguments": {"mode": "screen"}}}' | node dist/index.js +echo '{"jsonrpc": "2.0", "id": 2, "method": "tools/call", "params": {"name": "image", "arguments": {"mode": "screen"}}}' | node dist/index.js ``` ### Automated Testing diff --git a/docs/spec.md b/docs/spec.md index 59b2a42..2756397 100644 --- a/docs/spec.md +++ b/docs/spec.md @@ -9,7 +9,7 @@ https://aistudio.google.com/prompts/1B0Va41QEZz5ZMiGmLl2gDme8kQ-LQPW- * **NPM Package Name:** `peekaboo-mcp`. * **GitHub Project Name:** `peekaboo`. * Implements MCP server logic using the latest stable `@modelcontextprotocol/sdk`. - * Exposes three primary MCP tools: `peekaboo.image`, `peekaboo.analyze`, `peekaboo.list`. + * Exposes three primary MCP tools: `image`, `analyze`, `list`. * Translates MCP tool calls into commands for the Swift `peekaboo` CLI. * Parses structured JSON output from the Swift `peekaboo` CLI. * Handles image data preparation (reading files, Base64 encoding) for MCP responses if image data is explicitly requested by the client. @@ -60,21 +60,23 @@ https://aistudio.google.com/prompts/1B0Va41QEZz5ZMiGmLl2gDme8kQ-LQPW- * **Conditional Console Logging (Development Only):** If ENV VAR `PEEKABOO_MCP_CONSOLE_LOGGING="true"`, add a second Pino transport targeting `process.stderr.fd` (potentially using `pino-pretty` for human-readable output). * **Strict Rule:** All server operational logging must use the configured Pino instance. No direct `console.log/warn/error` that might output to `stdout`. 5. **Environment Variables (Read by Server):** - * `AI_PROVIDERS`: Comma-separated list of `provider_name/default_model_for_provider` pairs (e.g., `"openai/gpt-4o,ollama/qwen2.5vl:7b"`). If unset/empty, `peekaboo.analyze` tool reports AI not configured. + * `AI_PROVIDERS`: Comma-separated list of `provider_name/default_model_for_provider` pairs (e.g., `"openai/gpt-4o,ollama/qwen2.5vl:7b"`). If unset/empty, `analyze` tool reports AI not configured. * `OPENAI_API_KEY`: API key for OpenAI. * `ANTHROPIC_API_KEY`: (Example for future) API key for Anthropic. * (Other cloud provider API keys as standard ENV VAR names). * `OLLAMA_BASE_URL`: Base URL for local Ollama instance. Default: `"http://localhost:11434"`. * `LOG_LEVEL`: For Pino logger. Default: `"info"`. - * `PEEKABOO_MCP_CONSOLE_LOGGING`: Boolean (`"true"`/`"false"`) for dev console logs. Default: `"false"`. - * `PEEKABOO_CLI_PATH`: Optional override for Swift `peekaboo` CLI path. + * `LOG_FILE`: Path to the server's log file. Default: `path.join(os.tmpdir(), 'peekaboo-mcp.log')`. + * `DEFAULT_SAVE_PATH`: Default base absolute path for saving images captured by `image` if not specified in the tool input. If this ENV is also not set, the Swift CLI will use its own temporary directory logic. + * `CONSOLE_LOGGING`: Boolean (`"true"`/`"false"`) for dev console logs. Default: `"false"`. + * `CLI_PATH`: Optional override for Swift `peekaboo` CLI path. 6. **Initial Status Reporting Logic:** * A server-instance-level boolean flag: `let hasSentInitialStatus = false;`. * A function `generateServerStatusString()`: Creates a formatted string: `"\n\n--- Peekaboo MCP Server Status ---\nName: PeekabooMCP\nVersion: \nConfigured AI Providers (from AI_PROVIDERS ENV): \n---"`. - * Response Augmentation: In the function that sends a `ToolResponse` back to the MCP client, if the response is for a successful tool call (not `initialize`/`initialized` or `peekaboo.list` with `item_type: "server_status"`) AND `hasSentInitialStatus` is `false`: + * Response Augmentation: In the function that sends a `ToolResponse` back to the MCP client, if the response is for a successful tool call (not `initialize`/`initialized` or `list` with `item_type: "server_status"`) AND `hasSentInitialStatus` is `false`: * Append `generateServerStatusString()` to the first `TextContentItem` in `ToolResponse.content`. If no text item exists, prepend a new one. * Set `hasSentInitialStatus = true`. -7. **Tool Registration:** Register `peekaboo.image`, `peekaboo.analyze`, `peekaboo.list` with their Zod input schemas and handler functions. +7. **Tool Registration:** Register `image`, `analyze`, `list` with their Zod input schemas and handler functions. 8. **Transport:** `await server.connect(new StdioServerTransport());`. 9. **Shutdown:** Implement graceful shutdown on `SIGINT`, `SIGTERM` (e.g., `await server.close(); logger.flush(); process.exit(0);`). @@ -101,7 +103,7 @@ https://aistudio.google.com/prompts/1B0Va41QEZz5ZMiGmLl2gDme8kQ-LQPW- * If `swiftResponse.success === true`: * Process `swiftResponse.data` to construct the success MCP `ToolResponse`. * Relay `swiftResponse.messages` as `TextContentItem`s in the MCP response if appropriate. - * For `peekaboo.image` with `input.return_data: true`: + * For `image` with `input.return_data: true`: * Iterate `swiftResponse.data.saved_files.[*].path`. * For each path, read image file into a `Buffer`. * Base64 encode the `Buffer`. @@ -109,14 +111,14 @@ https://aistudio.google.com/prompts/1B0Va41QEZz5ZMiGmLl2gDme8kQ-LQPW- * Augment successful `ToolResponse` with initial server status string if applicable (see B.6). * Send MCP `ToolResponse`. -**Tool 1: `peekaboo.image`** +**Tool 1: `image`** * **MCP Description:** "Captures macOS screen content. Targets: entire screen (each display separately), a specific application window, or all windows of an application. Supports foreground/background capture. Captured image(s) can be saved to file(s) and/or returned directly as image data. Window shadows/frames are automatically excluded. Application identification uses intelligent fuzzy matching." * **MCP Input Schema (`ImageInputSchema`):** ```typescript z.object({ app: z.string().optional().describe("Optional. Target application: name, bundle ID, or partial name. If omitted, captures screen(s). Uses fuzzy matching."), - path: z.string().optional().describe("Optional. Base absolute path for saving. For 'screen' or 'multi' mode, display/window info is appended by backend. If omitted, default temporary paths used by backend. If 'return_data' true, images saved AND returned if 'path' specified."), + path: z.string().optional().describe("Optional. Base absolute path for saving. For 'screen' or 'multi' mode, display/window info is appended by backend. If omitted, the server checks the DEFAULT_SAVE_PATH environment variable. If neither is set, the Swift CLI uses its default temporary paths. If 'return_data' true, images saved AND returned if a path is determined (either from input or ENV)."), mode: z.enum(["screen", "window", "multi"]).optional().describe("Capture mode. Defaults to 'window' if 'app' is provided, otherwise 'screen'."), window_specifier: z.union([ z.object({ title: z.string().describe("Capture window by title.") }), @@ -137,7 +139,7 @@ https://aistudio.google.com/prompts/1B0Va41QEZz5ZMiGmLl2gDme8kQ-LQPW- * `isError?: boolean` * `_meta?: { backend_error_code?: string }` (For relaying Swift CLI error codes). -**Tool 2: `peekaboo.analyze`** +**Tool 2: `analyze`** * **MCP Description:** "Analyzes an image file using a configured AI model (local Ollama, cloud OpenAI, etc.) and returns a textual analysis/answer. Requires image path. AI provider selection and model defaults are governed by the server's `AI_PROVIDERS` environment variable and client overrides." * **MCP Input Schema (`AnalyzeInputSchema`):** @@ -183,7 +185,7 @@ https://aistudio.google.com/prompts/1B0Va41QEZz5ZMiGmLl2gDme8kQ-LQPW- * `isError?: boolean` * `_meta?: { backend_error_code?: string }` (For AI provider API errors). -**Tool 3: `peekaboo.list`** +**Tool 3: `list`** * **MCP Description:** "Lists system items: all running applications, windows of a specific app, or server status. Allows specifying window details. App ID uses fuzzy matching." * **MCP Input Schema (`ListInputSchema`):** @@ -408,16 +410,18 @@ https://aistudio.google.com/prompts/1B0Va41QEZz5ZMiGmLl2gDme8kQ-LQPW- } ``` 5. **Required macOS Permissions:** - * **Screen Recording:** Essential for ALL `peekaboo.image` functionalities and for `peekaboo.list` if it needs to read window titles (which it does via `CGWindowListCopyWindowInfo`). Provide clear, step-by-step instructions for System Settings. Include `open "x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture"` command. - * **Accessibility:** Required *only* if `peekaboo.image` with `capture_focus: "foreground"` needs to perform specific window raising actions (beyond simple app activation) via the Accessibility API. Explain this nuance. Include `open "x-apple.systempreferences:com.apple.preference.security?Privacy_Accessibility"` command. + * **Screen Recording:** Essential for ALL `image` functionalities and for `list` if it needs to read window titles (which it does via `CGWindowListCopyWindowInfo`). Provide clear, step-by-step instructions for System Settings. Include `open "x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture"` command. + * **Accessibility:** Required *only* if `image` with `capture_focus: "foreground"` needs to perform specific window raising actions (beyond simple app activation) via the Accessibility API. Explain this nuance. Include `open "x-apple.systempreferences:com.apple.preference.security?Privacy_Accessibility"` command. 6. **Environment Variables (for Node.js `peekaboo-mcp` server):** - * `AI_PROVIDERS`: Crucial for `peekaboo.analyze`. Explain format (`provider/model,provider/model`), effect, and that `peekaboo.analyze` reports "not configured" if unset. List recognized `provider` names ("ollama", "openai"). + * `AI_PROVIDERS`: Crucial for `analyze`. Explain format (`provider/model,provider/model`), effect, and that `analyze` reports "not configured" if unset. List recognized `provider` names ("ollama", "openai"). * `OPENAI_API_KEY` (and similar for other cloud providers): How they are used. * `OLLAMA_BASE_URL`: Default and purpose. * `LOG_LEVEL`: For `pino` logger. Values and default. - * `PEEKABOO_MCP_CONSOLE_LOGGING`: For development. - * `PEEKABOO_CLI_PATH`: For overriding bundled Swift CLI. + * `LOG_FILE`: Path to the server's log file. Default: `path.join(os.tmpdir(), 'peekaboo-mcp.log')`. + * `DEFAULT_SAVE_PATH`: Default base absolute path for saving images captured by `image` if not specified in the tool input. If this ENV is also not set, the Swift CLI will use its own temporary directory logic. + * `CONSOLE_LOGGING`: For development. + * `CLI_PATH`: For overriding bundled Swift CLI. 7. **MCP Tool Overview:** - * Brief descriptions of `peekaboo.image`, `peekaboo.analyze`, `peekaboo.list` and their primary purpose. + * Brief descriptions of `image`, `analyze`, `list` and their primary purpose. 8. **Link to Detailed Tool Specification:** A separate `TOOL_API_REFERENCE.md` (generated from or summarizing the Zod schemas and output structures in this document) for users/AI developers needing full schema details. 9. **Troubleshooting / Support:** Link to GitHub issues. diff --git a/package.json b/package.json index 6262893..0425d9c 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@steipete/peekaboo-mcp", - "version": "1.0.0-alpha1", + "version": "1.0.0-beta.1", "description": "A macOS utility exposed via Node.js MCP server for advanced screen captures, image analysis, and window management", "type": "module", "main": "dist/index.js", diff --git a/src/index.ts b/src/index.ts index 73b7cea..a0b6762 100644 --- a/src/index.ts +++ b/src/index.ts @@ -38,30 +38,39 @@ initializeSwiftCliPath(packageRootDir); let hasSentInitialStatus = false; // Initialize logger -const logLevel = process.env.LOG_LEVEL || 'info'; -const logFile = path.join(os.tmpdir(), 'peekaboo-mcp.log'); +const baseLogLevel = process.env.LOG_LEVEL || 'info'; +const logFile = process.env.LOG_FILE || path.join(os.tmpdir(), 'peekaboo-mcp.log'); -// Create logger with file destination by default -const logger = process.env.PEEKABOO_MCP_CONSOLE_LOGGING === 'true' - ? pino({ - name: 'peekaboo-mcp', - level: logLevel, - transport: { - target: 'pino-pretty', - options: { - colorize: true, - translateTime: true, - ignore: 'pid,hostname' - } - } - }) - : pino({ - name: 'peekaboo-mcp', - level: logLevel - }, pino.destination({ - dest: logFile, - sync: false - })); +const transportTargets = []; + +// Always add file transport +transportTargets.push({ + level: baseLogLevel, // Explicitly set level for this transport + target: 'pino/file', + options: { + destination: logFile, + mkdir: true // Ensure the directory exists + } +}); + +// Conditional console logging for development +if (process.env.CONSOLE_LOGGING === 'true') { + transportTargets.push({ + level: baseLogLevel, // Explicitly set level for this transport + target: 'pino-pretty', + options: { + destination: 2, // stderr + colorize: true, + translateTime: 'SYS:standard', // More standard time format + ignore: 'pid,hostname' + } + }); +} + +const logger = pino({ + name: 'peekaboo-mcp', + level: baseLogLevel, // Overall minimum level +}, pino.transport({ targets: transportTargets })); // Tool context for handlers const toolContext = { logger }; @@ -137,17 +146,17 @@ server.setRequestHandler(ListToolsRequestSchema, async () => { return { tools: [ { - name: 'peekaboo.image', + name: 'image', description: 'Captures macOS screen content. Targets: entire screen (each display separately), a specific application window, or all windows of an application. Supports foreground/background capture. Captured image(s) can be saved to file(s) and/or returned directly as image data. Window shadows/frames are automatically excluded. Application identification uses intelligent fuzzy matching.', inputSchema: zodToJsonSchema(imageToolSchema) }, { - name: 'peekaboo.analyze', + name: 'analyze', description: 'Analyzes an image file using a configured AI model (local Ollama, cloud OpenAI, etc.) and returns a textual analysis/answer. Requires image path. AI provider selection and model defaults are governed by the server\'s `AI_PROVIDERS` environment variable and client overrides.', inputSchema: zodToJsonSchema(analyzeToolSchema) }, { - name: 'peekaboo.list', + name: 'list', description: 'Lists system items: all running applications, windows of a specific app, or server status. Allows specifying window details. App ID uses fuzzy matching.', inputSchema: zodToJsonSchema(listToolSchema) } @@ -164,17 +173,17 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { try { switch (name) { - case 'peekaboo.image': { + case 'image': { const validatedArgs = imageToolSchema.parse(args || {}); response = await imageToolHandler(validatedArgs, toolContext); break; } - case 'peekaboo.analyze': { + case 'analyze': { const validatedArgs = analyzeToolSchema.parse(args || {}); response = await analyzeToolHandler(validatedArgs, toolContext); break; } - case 'peekaboo.list': { + case 'list': { const validatedArgs = listToolSchema.parse(args || {}); response = await listToolHandler(validatedArgs, toolContext); // Do not augment status for peekaboo.list with item_type: "server_status" diff --git a/src/tools/image.ts b/src/tools/image.ts index 506dbc6..1f3f06a 100644 --- a/src/tools/image.ts +++ b/src/tools/image.ts @@ -120,8 +120,13 @@ export function buildSwiftCliArgs(input: ImageToolInput): string[] { args.push('--app', input.app); } - if (input.path) { - args.push('--path', input.path); + let effectivePath = input.path; + if (!effectivePath && process.env.DEFAULT_SAVE_PATH) { + effectivePath = process.env.DEFAULT_SAVE_PATH; + } + + if (effectivePath) { + args.push('--path', effectivePath); } args.push('--mode', mode); diff --git a/src/utils/swift-cli.ts b/src/utils/swift-cli.ts index e969214..81c80ba 100644 --- a/src/utils/swift-cli.ts +++ b/src/utils/swift-cli.ts @@ -10,7 +10,7 @@ let resolvedCliPath: string | null = null; const INVALID_PATH_SENTINEL = 'PEEKABOO_CLI_PATH_RESOLUTION_FAILED'; function determineSwiftCliPath(packageRootDirForFallback?: string): string { - const envPath = process.env.PEEKABOO_CLI_PATH; + const envPath = process.env.CLI_PATH; if (envPath) { try { if (existsSync(envPath)) { @@ -24,14 +24,14 @@ function determineSwiftCliPath(packageRootDirForFallback?: string): string { return path.resolve(packageRootDirForFallback, 'peekaboo'); } - // If neither PEEKABOO_CLI_PATH is valid nor packageRootDirForFallback is provided, + // If neither CLI_PATH is valid nor packageRootDirForFallback is provided, // this is a critical failure in path determination. return INVALID_PATH_SENTINEL; } export function initializeSwiftCliPath(packageRootDir: string): void { if (!packageRootDir) { - // If PEEKABOO_CLI_PATH is also not set or invalid, this will lead to INVALID_PATH_SENTINEL + // If CLI_PATH is also not set or invalid, this will lead to INVALID_PATH_SENTINEL // Allow determineSwiftCliPath to handle this, and the error will be caught by getInitializedSwiftCliPath } resolvedCliPath = determineSwiftCliPath(packageRootDir); @@ -40,7 +40,7 @@ export function initializeSwiftCliPath(packageRootDir: string): void { function getInitializedSwiftCliPath(logger: Logger): string { // Logger is now mandatory if (!resolvedCliPath || resolvedCliPath === INVALID_PATH_SENTINEL) { - const errorMessage = `Peekaboo Swift CLI path is not properly initialized or resolution failed. Resolved path: '${resolvedCliPath}'. Ensure PEEKABOO_CLI_PATH is valid or initializeSwiftCliPath() was called with a correct package root directory at startup.`; + const errorMessage = `Peekaboo Swift CLI path is not properly initialized or resolution failed. Resolved path: '${resolvedCliPath}'. Ensure CLI_PATH is valid or initializeSwiftCliPath() was called with a correct package root directory at startup.`; logger.error(errorMessage); // Throw an error to prevent attempting to use an invalid path throw new Error(errorMessage);