feat: Implement proper frontmost window capture

Adds support for capturing the frontmost window of the frontmost application
instead of falling back to screen capture mode.

Changes:
- Added 'frontmost' case to CaptureMode enum in Swift CLI
- Implemented captureFrontmostWindow() method using NSWorkspace.shared.frontmostApplication
- Updated TypeScript to use --mode frontmost instead of defaulting to screen mode
- Added comprehensive test coverage for frontmost functionality
- Updated existing tests to reflect new behavior

The frontmost mode now:
1. Detects the currently active application
2. Captures only its frontmost window (index 0)
3. Returns a single image file with proper metadata

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Peter Steinberger 2025-06-08 08:42:43 +01:00
parent 34dac65d2a
commit d5b40c1550
8 changed files with 166 additions and 94 deletions

View file

@ -80,6 +80,8 @@ struct ImageCommand: ParsableCommand {
} else { } else {
return try captureScreens() return try captureScreens()
} }
case .frontmost:
return try captureFrontmostWindow()
} }
} }
@ -96,75 +98,8 @@ struct ImageCommand: ParsableCommand {
} }
} }
private func handleError(_ error: Error) { private func handleError(_ error: Error) -> Never {
let captureError: CaptureError = if let err = error as? CaptureError { ImageErrorHandler.handleError(error, jsonOutput: jsonOutput)
err
} else {
.unknownError(error.localizedDescription)
}
// Log the full error details for debugging
Logger.shared.debug("Image capture error: \(error)")
// If it's a CaptureError with an underlying error, log that too
switch captureError {
case let .captureCreationFailed(underlyingError):
if let underlying = underlyingError {
Logger.shared.debug("Underlying capture creation error: \(underlying)")
}
case let .windowCaptureFailed(underlyingError):
if let underlying = underlyingError {
Logger.shared.debug("Underlying window capture error: \(underlying)")
}
case let .fileWriteError(_, underlyingError):
if let underlying = underlyingError {
Logger.shared.debug("Underlying file write error: \(underlying)")
}
default:
break
}
if jsonOutput {
let code: ErrorCode = switch captureError {
case .screenRecordingPermissionDenied:
.PERMISSION_ERROR_SCREEN_RECORDING
case .accessibilityPermissionDenied:
.PERMISSION_ERROR_ACCESSIBILITY
case .appNotFound:
.APP_NOT_FOUND
case .windowNotFound, .noWindowsFound:
.WINDOW_NOT_FOUND
case .fileWriteError:
.FILE_IO_ERROR
case .invalidArgument:
.INVALID_ARGUMENT
case .unknownError:
.UNKNOWN_ERROR
default:
.CAPTURE_FAILED
}
// Provide additional details for app not found errors
var details: String?
if case .appNotFound = captureError {
let runningApps = NSWorkspace.shared.runningApplications
.filter { $0.activationPolicy == .regular }
.compactMap(\.localizedName)
.sorted()
.joined(separator: ", ")
details = "Available applications: \(runningApps)"
}
outputError(
message: captureError.localizedDescription,
code: code,
details: details ?? "Image capture operation failed"
)
} else {
var localStandardErrorStream = FileHandleTextOutputStream(FileHandle.standardError)
print("Error: \(captureError.localizedDescription)", to: &localStandardErrorStream)
}
Foundation.exit(captureError.exitCode)
} }
private func determineMode() -> CaptureMode { private func determineMode() -> CaptureMode {
@ -307,7 +242,10 @@ struct ImageCommand: ParsableCommand {
let searchTerm = windowTitle let searchTerm = windowTitle
let appName = targetApp.localizedName ?? "Unknown" let appName = targetApp.localizedName ?? "Unknown"
Logger.shared.debug("Window not found. Searched for '\(searchTerm)' in \(appName). Available windows: \(availableTitles)") Logger.shared.debug(
"Window not found. Searched for '\(searchTerm)' in \(appName). " +
"Available windows: \(availableTitles)"
)
throw CaptureError.windowTitleNotFound(searchTerm, appName, availableTitles) throw CaptureError.windowTitleNotFound(searchTerm, appName, availableTitles)
} }
@ -502,4 +440,45 @@ struct ImageCommand: ParsableCommand {
throw CaptureError.windowCaptureFailed(error) throw CaptureError.windowCaptureFailed(error)
} }
} }
private func captureFrontmostWindow() throws -> [SavedFile] {
Logger.shared.debug("Capturing frontmost window")
// Get the frontmost (active) application
guard let frontmostApp = NSWorkspace.shared.frontmostApplication else {
throw CaptureError.appNotFound("No frontmost application found")
}
Logger.shared.debug("Frontmost app: \(frontmostApp.localizedName ?? "Unknown")")
// Get windows for the frontmost app
let windows = try WindowManager.getWindowsForApp(pid: frontmostApp.processIdentifier)
guard !windows.isEmpty else {
throw CaptureError.noWindowsFound(frontmostApp.localizedName ?? "frontmost application")
}
// Get the frontmost window (index 0)
let frontmostWindow = windows[0]
Logger.shared.debug("Capturing frontmost window: '\(frontmostWindow.title)'")
// Generate output path
let timestamp = DateFormatter.timestamp.string(from: Date())
let appName = frontmostApp.localizedName ?? "UnknownApp"
let safeName = appName.replacingOccurrences(of: " ", with: "_")
let fileName = "frontmost_\(safeName)_\(timestamp).\(format.rawValue)"
let filePath = OutputPathResolver.getOutputPathWithFallback(basePath: path, fileName: fileName)
// Capture the window
try captureWindow(frontmostWindow, to: filePath)
return [SavedFile(
path: filePath,
item_label: appName,
window_title: frontmostWindow.title,
window_id: UInt32(frontmostWindow.windowId),
window_index: frontmostWindow.windowIndex,
mime_type: format == .png ? "image/png" : "image/jpeg"
)]
}
} }

View file

@ -0,0 +1,75 @@
import Foundation
import AppKit
struct ImageErrorHandler {
static func handleError(_ error: Error, jsonOutput: Bool) -> Never {
let captureError: CaptureError = if let err = error as? CaptureError {
err
} else {
.unknownError(error.localizedDescription)
}
// Log the full error details for debugging
Logger.shared.debug("Image capture error: \(error)")
// If it's a CaptureError with an underlying error, log that too
switch captureError {
case let .captureCreationFailed(underlyingError):
if let underlying = underlyingError {
Logger.shared.debug("Underlying capture creation error: \(underlying)")
}
case let .windowCaptureFailed(underlyingError):
if let underlying = underlyingError {
Logger.shared.debug("Underlying window capture error: \(underlying)")
}
case let .fileWriteError(_, underlyingError):
if let underlying = underlyingError {
Logger.shared.debug("Underlying file write error: \(underlying)")
}
default:
break
}
if jsonOutput {
let code: ErrorCode = switch captureError {
case .screenRecordingPermissionDenied:
.PERMISSION_ERROR_SCREEN_RECORDING
case .accessibilityPermissionDenied:
.PERMISSION_ERROR_ACCESSIBILITY
case .appNotFound:
.APP_NOT_FOUND
case .windowNotFound, .noWindowsFound:
.WINDOW_NOT_FOUND
case .fileWriteError:
.FILE_IO_ERROR
case .invalidArgument:
.INVALID_ARGUMENT
case .unknownError:
.UNKNOWN_ERROR
default:
.CAPTURE_FAILED
}
// Provide additional details for app not found errors
var details: String?
if case .appNotFound = captureError {
let runningApps = NSWorkspace.shared.runningApplications
.filter { $0.activationPolicy == .regular }
.compactMap(\.localizedName)
.sorted()
.joined(separator: ", ")
details = "Available applications: \(runningApps)"
}
outputError(
message: captureError.localizedDescription,
code: code,
details: details ?? "Image capture operation failed"
)
} else {
var localStandardErrorStream = FileHandleTextOutputStream(FileHandle.standardError)
print("Error: \(captureError.localizedDescription)", to: &localStandardErrorStream)
}
Foundation.exit(captureError.exitCode)
}
}

View file

@ -20,6 +20,7 @@ enum CaptureMode: String, CaseIterable, ExpressibleByArgument {
case screen case screen
case window case window
case multi case multi
case frontmost
} }
enum ImageFormat: String, CaseIterable, ExpressibleByArgument { enum ImageFormat: String, CaseIterable, ExpressibleByArgument {

View file

@ -129,11 +129,9 @@ struct OutputPathResolver {
let sensitivePathPrefixes = ["/etc/", "/usr/", "/bin/", "/sbin/", "/System/", "/Library/System/"] let sensitivePathPrefixes = ["/etc/", "/usr/", "/bin/", "/sbin/", "/System/", "/Library/System/"]
let normalizedPath = (path as NSString).standardizingPath let normalizedPath = (path as NSString).standardizingPath
for prefix in sensitivePathPrefixes { for prefix in sensitivePathPrefixes where normalizedPath.hasPrefix(prefix) {
if normalizedPath.hasPrefix(prefix) { Logger.shared.debug("Path points to system directory: \(path) -> \(normalizedPath)")
Logger.shared.debug("Path points to system directory: \(path) -> \(normalizedPath)") break
break
}
} }
} }
} }

View file

@ -1,4 +1,4 @@
// This file is auto-generated by the build script. Do not edit manually. // This file is auto-generated by the build script. Do not edit manually.
enum Version { enum Version {
static let current = "1.0.0-beta.20" static let current = "1.0.0-beta.21"
} }

View file

@ -82,11 +82,10 @@ export function buildSwiftCliArgs(
args.push("--mode", "screen", "--screen-index", screenIndex.toString()); args.push("--mode", "screen", "--screen-index", screenIndex.toString());
} }
} else if (input.app_target.toLowerCase() === "frontmost") { } else if (input.app_target.toLowerCase() === "frontmost") {
// 'frontmost': All windows of the frontmost app // 'frontmost': Capture the frontmost window of the frontmost app
log.warn( // This requires special handling to first find the frontmost app, then capture its frontmost window
"'frontmost' target requires determining current frontmost app, defaulting to screen mode", log.debug("Using frontmost mode - will attempt to capture frontmost window");
); args.push("--mode", "frontmost");
args.push("--mode", "screen");
} else if (input.app_target.includes(":")) { } else if (input.app_target.includes(":")) {
// 'AppName:WINDOW_TITLE:Title' or 'AppName:WINDOW_INDEX:Index' // 'AppName:WINDOW_TITLE:Title' or 'AppName:WINDOW_INDEX:Index'
const parts = input.app_target.split(":"); const parts = input.app_target.split(":");

View file

@ -94,6 +94,26 @@ export const mockSwiftCli = {
}; };
}, },
// Mock frontmost window capture response
captureFrontmostWindow(): SwiftCliResponse {
return {
success: true,
data: {
saved_files: [
{
path: "/tmp/frontmost_Safari_20250608_083000.png",
item_label: "Safari",
window_title: "Example Website - Safari",
window_id: 12345,
window_index: 0,
mime_type: "image/png",
},
],
} as ImageCaptureData,
messages: [],
};
},
// Mock error responses // Mock error responses
permissionDenied(): SwiftCliResponse { permissionDenied(): SwiftCliResponse {
return { return {

View file

@ -383,28 +383,28 @@ describe("Image Tool", () => {
); );
}); });
it("should handle app_target: 'frontmost' with warning", async () => { it("should handle app_target: 'frontmost' with new frontmost mode", async () => {
// Mock resolveImagePath for minimal case // Mock resolveImagePath for minimal case
mockResolveImagePath.mockResolvedValue({ mockResolveImagePath.mockResolvedValue({
effectivePath: MOCK_TEMP_IMAGE_DIR, effectivePath: MOCK_TEMP_IMAGE_DIR,
tempDirUsed: MOCK_TEMP_IMAGE_DIR, tempDirUsed: MOCK_TEMP_IMAGE_DIR,
}); });
const mockResponse = mockSwiftCli.captureImage("screen", {}); const mockResponse = mockSwiftCli.captureFrontmostWindow();
mockExecuteSwiftCli.mockResolvedValue(mockResponse); mockExecuteSwiftCli.mockResolvedValue(mockResponse);
const loggerWarnSpy = vi.spyOn(mockLogger, "warn"); const loggerDebugSpy = vi.spyOn(mockLogger, "debug");
await imageToolHandler( await imageToolHandler(
{ app_target: "frontmost" }, { app_target: "frontmost" },
mockContext, mockContext,
); );
expect(loggerWarnSpy).toHaveBeenCalledWith( expect(loggerDebugSpy).toHaveBeenCalledWith(
"'frontmost' target requires determining current frontmost app, defaulting to screen mode", "Using frontmost mode - will attempt to capture frontmost window",
); );
expect(mockExecuteSwiftCli).toHaveBeenCalledWith( expect(mockExecuteSwiftCli).toHaveBeenCalledWith(
expect.arrayContaining(["--mode", "screen"]), expect.arrayContaining(["--mode", "frontmost"]),
mockLogger, mockLogger,
expect.objectContaining({ timeout: expect.any(Number) }) expect.objectContaining({ timeout: expect.any(Number) })
); );
@ -1035,33 +1035,33 @@ describe("Image Tool", () => {
}); });
it("should handle app_target: 'frontmost'", () => { it("should handle app_target: 'frontmost'", () => {
const loggerWarnSpy = vi.spyOn(mockLogger, "warn"); const loggerDebugSpy = vi.spyOn(mockLogger, "debug");
const args = buildSwiftCliArgs({ app_target: "frontmost" }, undefined, undefined, mockLogger); const args = buildSwiftCliArgs({ app_target: "frontmost" }, undefined, undefined, mockLogger);
expect(args).toEqual( expect(args).toEqual(
expect.arrayContaining(["--mode", "screen"]), expect.arrayContaining(["--mode", "frontmost"]),
); );
expect(args).not.toContain("--app"); expect(args).not.toContain("--app");
expect(loggerWarnSpy).toHaveBeenCalled(); expect(loggerDebugSpy).toHaveBeenCalledWith("Using frontmost mode - will attempt to capture frontmost window");
}); });
it("should handle app_target: 'frontmost' case-insensitively", () => { it("should handle app_target: 'frontmost' case-insensitively", () => {
const loggerWarnSpy = vi.spyOn(mockLogger, "warn"); const loggerDebugSpy = vi.spyOn(mockLogger, "debug");
// Test uppercase // Test uppercase
const argsUpper = buildSwiftCliArgs({ app_target: "FRONTMOST" }, undefined, undefined, mockLogger); const argsUpper = buildSwiftCliArgs({ app_target: "FRONTMOST" }, undefined, undefined, mockLogger);
expect(argsUpper).toEqual( expect(argsUpper).toEqual(
expect.arrayContaining(["--mode", "screen"]), expect.arrayContaining(["--mode", "frontmost"]),
); );
expect(argsUpper).not.toContain("--app"); expect(argsUpper).not.toContain("--app");
// Test mixed case // Test mixed case
const argsMixed = buildSwiftCliArgs({ app_target: "Frontmost" }, undefined, undefined, mockLogger); const argsMixed = buildSwiftCliArgs({ app_target: "Frontmost" }, undefined, undefined, mockLogger);
expect(argsMixed).toEqual( expect(argsMixed).toEqual(
expect.arrayContaining(["--mode", "screen"]), expect.arrayContaining(["--mode", "frontmost"]),
); );
expect(argsMixed).not.toContain("--app"); expect(argsMixed).not.toContain("--app");
expect(loggerWarnSpy).toHaveBeenCalledTimes(2); expect(loggerDebugSpy).toHaveBeenCalledTimes(2);
}); });
it("should handle window specifiers case-insensitively", () => { it("should handle window specifiers case-insensitively", () => {