feat: Implement proper frontmost window capture

Adds support for capturing the frontmost window of the frontmost application
instead of falling back to screen capture mode.

Changes:
- Added 'frontmost' case to CaptureMode enum in Swift CLI
- Implemented captureFrontmostWindow() method using NSWorkspace.shared.frontmostApplication
- Updated TypeScript to use --mode frontmost instead of defaulting to screen mode
- Added comprehensive test coverage for frontmost functionality
- Updated existing tests to reflect new behavior

The frontmost mode now:
1. Detects the currently active application
2. Captures only its frontmost window (index 0)
3. Returns a single image file with proper metadata

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Peter Steinberger 2025-06-08 08:42:43 +01:00
parent 34dac65d2a
commit d5b40c1550
8 changed files with 166 additions and 94 deletions

View file

@ -80,6 +80,8 @@ struct ImageCommand: ParsableCommand {
} else {
return try captureScreens()
}
case .frontmost:
return try captureFrontmostWindow()
}
}
@ -96,75 +98,8 @@ struct ImageCommand: ParsableCommand {
}
}
private func handleError(_ error: Error) {
let captureError: CaptureError = if let err = error as? CaptureError {
err
} else {
.unknownError(error.localizedDescription)
}
// Log the full error details for debugging
Logger.shared.debug("Image capture error: \(error)")
// If it's a CaptureError with an underlying error, log that too
switch captureError {
case let .captureCreationFailed(underlyingError):
if let underlying = underlyingError {
Logger.shared.debug("Underlying capture creation error: \(underlying)")
}
case let .windowCaptureFailed(underlyingError):
if let underlying = underlyingError {
Logger.shared.debug("Underlying window capture error: \(underlying)")
}
case let .fileWriteError(_, underlyingError):
if let underlying = underlyingError {
Logger.shared.debug("Underlying file write error: \(underlying)")
}
default:
break
}
if jsonOutput {
let code: ErrorCode = switch captureError {
case .screenRecordingPermissionDenied:
.PERMISSION_ERROR_SCREEN_RECORDING
case .accessibilityPermissionDenied:
.PERMISSION_ERROR_ACCESSIBILITY
case .appNotFound:
.APP_NOT_FOUND
case .windowNotFound, .noWindowsFound:
.WINDOW_NOT_FOUND
case .fileWriteError:
.FILE_IO_ERROR
case .invalidArgument:
.INVALID_ARGUMENT
case .unknownError:
.UNKNOWN_ERROR
default:
.CAPTURE_FAILED
}
// Provide additional details for app not found errors
var details: String?
if case .appNotFound = captureError {
let runningApps = NSWorkspace.shared.runningApplications
.filter { $0.activationPolicy == .regular }
.compactMap(\.localizedName)
.sorted()
.joined(separator: ", ")
details = "Available applications: \(runningApps)"
}
outputError(
message: captureError.localizedDescription,
code: code,
details: details ?? "Image capture operation failed"
)
} else {
var localStandardErrorStream = FileHandleTextOutputStream(FileHandle.standardError)
print("Error: \(captureError.localizedDescription)", to: &localStandardErrorStream)
}
Foundation.exit(captureError.exitCode)
private func handleError(_ error: Error) -> Never {
ImageErrorHandler.handleError(error, jsonOutput: jsonOutput)
}
private func determineMode() -> CaptureMode {
@ -307,7 +242,10 @@ struct ImageCommand: ParsableCommand {
let searchTerm = windowTitle
let appName = targetApp.localizedName ?? "Unknown"
Logger.shared.debug("Window not found. Searched for '\(searchTerm)' in \(appName). Available windows: \(availableTitles)")
Logger.shared.debug(
"Window not found. Searched for '\(searchTerm)' in \(appName). " +
"Available windows: \(availableTitles)"
)
throw CaptureError.windowTitleNotFound(searchTerm, appName, availableTitles)
}
@ -502,4 +440,45 @@ struct ImageCommand: ParsableCommand {
throw CaptureError.windowCaptureFailed(error)
}
}
private func captureFrontmostWindow() throws -> [SavedFile] {
Logger.shared.debug("Capturing frontmost window")
// Get the frontmost (active) application
guard let frontmostApp = NSWorkspace.shared.frontmostApplication else {
throw CaptureError.appNotFound("No frontmost application found")
}
Logger.shared.debug("Frontmost app: \(frontmostApp.localizedName ?? "Unknown")")
// Get windows for the frontmost app
let windows = try WindowManager.getWindowsForApp(pid: frontmostApp.processIdentifier)
guard !windows.isEmpty else {
throw CaptureError.noWindowsFound(frontmostApp.localizedName ?? "frontmost application")
}
// Get the frontmost window (index 0)
let frontmostWindow = windows[0]
Logger.shared.debug("Capturing frontmost window: '\(frontmostWindow.title)'")
// Generate output path
let timestamp = DateFormatter.timestamp.string(from: Date())
let appName = frontmostApp.localizedName ?? "UnknownApp"
let safeName = appName.replacingOccurrences(of: " ", with: "_")
let fileName = "frontmost_\(safeName)_\(timestamp).\(format.rawValue)"
let filePath = OutputPathResolver.getOutputPathWithFallback(basePath: path, fileName: fileName)
// Capture the window
try captureWindow(frontmostWindow, to: filePath)
return [SavedFile(
path: filePath,
item_label: appName,
window_title: frontmostWindow.title,
window_id: UInt32(frontmostWindow.windowId),
window_index: frontmostWindow.windowIndex,
mime_type: format == .png ? "image/png" : "image/jpeg"
)]
}
}

View file

@ -0,0 +1,75 @@
import Foundation
import AppKit
struct ImageErrorHandler {
static func handleError(_ error: Error, jsonOutput: Bool) -> Never {
let captureError: CaptureError = if let err = error as? CaptureError {
err
} else {
.unknownError(error.localizedDescription)
}
// Log the full error details for debugging
Logger.shared.debug("Image capture error: \(error)")
// If it's a CaptureError with an underlying error, log that too
switch captureError {
case let .captureCreationFailed(underlyingError):
if let underlying = underlyingError {
Logger.shared.debug("Underlying capture creation error: \(underlying)")
}
case let .windowCaptureFailed(underlyingError):
if let underlying = underlyingError {
Logger.shared.debug("Underlying window capture error: \(underlying)")
}
case let .fileWriteError(_, underlyingError):
if let underlying = underlyingError {
Logger.shared.debug("Underlying file write error: \(underlying)")
}
default:
break
}
if jsonOutput {
let code: ErrorCode = switch captureError {
case .screenRecordingPermissionDenied:
.PERMISSION_ERROR_SCREEN_RECORDING
case .accessibilityPermissionDenied:
.PERMISSION_ERROR_ACCESSIBILITY
case .appNotFound:
.APP_NOT_FOUND
case .windowNotFound, .noWindowsFound:
.WINDOW_NOT_FOUND
case .fileWriteError:
.FILE_IO_ERROR
case .invalidArgument:
.INVALID_ARGUMENT
case .unknownError:
.UNKNOWN_ERROR
default:
.CAPTURE_FAILED
}
// Provide additional details for app not found errors
var details: String?
if case .appNotFound = captureError {
let runningApps = NSWorkspace.shared.runningApplications
.filter { $0.activationPolicy == .regular }
.compactMap(\.localizedName)
.sorted()
.joined(separator: ", ")
details = "Available applications: \(runningApps)"
}
outputError(
message: captureError.localizedDescription,
code: code,
details: details ?? "Image capture operation failed"
)
} else {
var localStandardErrorStream = FileHandleTextOutputStream(FileHandle.standardError)
print("Error: \(captureError.localizedDescription)", to: &localStandardErrorStream)
}
Foundation.exit(captureError.exitCode)
}
}

View file

@ -20,6 +20,7 @@ enum CaptureMode: String, CaseIterable, ExpressibleByArgument {
case screen
case window
case multi
case frontmost
}
enum ImageFormat: String, CaseIterable, ExpressibleByArgument {

View file

@ -129,11 +129,9 @@ struct OutputPathResolver {
let sensitivePathPrefixes = ["/etc/", "/usr/", "/bin/", "/sbin/", "/System/", "/Library/System/"]
let normalizedPath = (path as NSString).standardizingPath
for prefix in sensitivePathPrefixes {
if normalizedPath.hasPrefix(prefix) {
for prefix in sensitivePathPrefixes where normalizedPath.hasPrefix(prefix) {
Logger.shared.debug("Path points to system directory: \(path) -> \(normalizedPath)")
break
}
}
}
}

View file

@ -1,4 +1,4 @@
// This file is auto-generated by the build script. Do not edit manually.
enum Version {
static let current = "1.0.0-beta.20"
static let current = "1.0.0-beta.21"
}

View file

@ -82,11 +82,10 @@ export function buildSwiftCliArgs(
args.push("--mode", "screen", "--screen-index", screenIndex.toString());
}
} else if (input.app_target.toLowerCase() === "frontmost") {
// 'frontmost': All windows of the frontmost app
log.warn(
"'frontmost' target requires determining current frontmost app, defaulting to screen mode",
);
args.push("--mode", "screen");
// 'frontmost': Capture the frontmost window of the frontmost app
// This requires special handling to first find the frontmost app, then capture its frontmost window
log.debug("Using frontmost mode - will attempt to capture frontmost window");
args.push("--mode", "frontmost");
} else if (input.app_target.includes(":")) {
// 'AppName:WINDOW_TITLE:Title' or 'AppName:WINDOW_INDEX:Index'
const parts = input.app_target.split(":");

View file

@ -94,6 +94,26 @@ export const mockSwiftCli = {
};
},
// Mock frontmost window capture response
captureFrontmostWindow(): SwiftCliResponse {
return {
success: true,
data: {
saved_files: [
{
path: "/tmp/frontmost_Safari_20250608_083000.png",
item_label: "Safari",
window_title: "Example Website - Safari",
window_id: 12345,
window_index: 0,
mime_type: "image/png",
},
],
} as ImageCaptureData,
messages: [],
};
},
// Mock error responses
permissionDenied(): SwiftCliResponse {
return {

View file

@ -383,28 +383,28 @@ describe("Image Tool", () => {
);
});
it("should handle app_target: 'frontmost' with warning", async () => {
it("should handle app_target: 'frontmost' with new frontmost mode", async () => {
// Mock resolveImagePath for minimal case
mockResolveImagePath.mockResolvedValue({
effectivePath: MOCK_TEMP_IMAGE_DIR,
tempDirUsed: MOCK_TEMP_IMAGE_DIR,
});
const mockResponse = mockSwiftCli.captureImage("screen", {});
const mockResponse = mockSwiftCli.captureFrontmostWindow();
mockExecuteSwiftCli.mockResolvedValue(mockResponse);
const loggerWarnSpy = vi.spyOn(mockLogger, "warn");
const loggerDebugSpy = vi.spyOn(mockLogger, "debug");
await imageToolHandler(
{ app_target: "frontmost" },
mockContext,
);
expect(loggerWarnSpy).toHaveBeenCalledWith(
"'frontmost' target requires determining current frontmost app, defaulting to screen mode",
expect(loggerDebugSpy).toHaveBeenCalledWith(
"Using frontmost mode - will attempt to capture frontmost window",
);
expect(mockExecuteSwiftCli).toHaveBeenCalledWith(
expect.arrayContaining(["--mode", "screen"]),
expect.arrayContaining(["--mode", "frontmost"]),
mockLogger,
expect.objectContaining({ timeout: expect.any(Number) })
);
@ -1035,33 +1035,33 @@ describe("Image Tool", () => {
});
it("should handle app_target: 'frontmost'", () => {
const loggerWarnSpy = vi.spyOn(mockLogger, "warn");
const loggerDebugSpy = vi.spyOn(mockLogger, "debug");
const args = buildSwiftCliArgs({ app_target: "frontmost" }, undefined, undefined, mockLogger);
expect(args).toEqual(
expect.arrayContaining(["--mode", "screen"]),
expect.arrayContaining(["--mode", "frontmost"]),
);
expect(args).not.toContain("--app");
expect(loggerWarnSpy).toHaveBeenCalled();
expect(loggerDebugSpy).toHaveBeenCalledWith("Using frontmost mode - will attempt to capture frontmost window");
});
it("should handle app_target: 'frontmost' case-insensitively", () => {
const loggerWarnSpy = vi.spyOn(mockLogger, "warn");
const loggerDebugSpy = vi.spyOn(mockLogger, "debug");
// Test uppercase
const argsUpper = buildSwiftCliArgs({ app_target: "FRONTMOST" }, undefined, undefined, mockLogger);
expect(argsUpper).toEqual(
expect.arrayContaining(["--mode", "screen"]),
expect.arrayContaining(["--mode", "frontmost"]),
);
expect(argsUpper).not.toContain("--app");
// Test mixed case
const argsMixed = buildSwiftCliArgs({ app_target: "Frontmost" }, undefined, undefined, mockLogger);
expect(argsMixed).toEqual(
expect.arrayContaining(["--mode", "screen"]),
expect.arrayContaining(["--mode", "frontmost"]),
);
expect(argsMixed).not.toContain("--app");
expect(loggerWarnSpy).toHaveBeenCalledTimes(2);
expect(loggerDebugSpy).toHaveBeenCalledTimes(2);
});
it("should handle window specifiers case-insensitively", () => {