Peekaboo/peekaboo-cli/Sources/peekaboo/ImageCommand.swift
Peter Steinberger dd680eb638 feat: Improve window title matching and error messages for URLs with ports
When users search for windows with URLs containing ports (e.g., 'http://example.com:8080'),
the system now provides much better debugging information when the window isn't found.

Key improvements:
- Enhanced window not found errors now list all available window titles
- Added specific guidance for URL-based searches (try without protocol)
- New CaptureError.windowTitleNotFound with detailed debugging info
- Comprehensive test coverage for colon parsing in app targets
- Better error messages help users understand why matching failed

Example improved error:
"Window with title containing 'http://example.com:8080' not found in Google Chrome.
Available windows: 'example.com:8080 - Google Chrome', 'New Tab - Google Chrome'.
Note: For URLs, try without the protocol (e.g., 'example.com:8080' instead of 'http://example.com:8080')."

This addresses the common issue where browsers display simplified URLs in window titles
without the protocol, making it easier for users to find the correct matching pattern.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-06-08 08:09:47 +01:00

505 lines
18 KiB
Swift

import AppKit
import ArgumentParser
import CoreGraphics
import Foundation
import ScreenCaptureKit
import UniformTypeIdentifiers
// Define the wrapper struct
struct FileHandleTextOutputStream: TextOutputStream {
private let fileHandle: FileHandle
init(_ fileHandle: FileHandle) {
self.fileHandle = fileHandle
}
mutating func write(_ string: String) {
guard let data = string.data(using: .utf8) else { return }
fileHandle.write(data)
}
}
struct ImageCommand: ParsableCommand {
static let configuration = CommandConfiguration(
commandName: "image",
abstract: "Capture screen or window images"
)
@Option(name: .long, help: "Target application identifier")
var app: String?
@Option(name: .long, help: "Base output path for saved images")
var path: String?
@Option(name: .long, help: "Capture mode")
var mode: CaptureMode?
@Option(name: .long, help: "Window title to capture")
var windowTitle: String?
@Option(name: .long, help: "Window index to capture (0=frontmost)")
var windowIndex: Int?
@Option(name: .long, help: "Screen index to capture (0-based)")
var screenIndex: Int?
@Option(name: .long, help: "Image format")
var format: ImageFormat = .png
@Option(name: .long, help: "Capture focus behavior")
var captureFocus: CaptureFocus = .auto
@Flag(name: .long, help: "Output results in JSON format")
var jsonOutput = false
func run() {
Logger.shared.setJsonOutputMode(jsonOutput)
do {
try PermissionsChecker.requireScreenRecordingPermission()
let savedFiles = try performCapture()
outputResults(savedFiles)
} catch {
handleError(error)
}
}
private func performCapture() throws -> [SavedFile] {
let captureMode = determineMode()
switch captureMode {
case .screen:
return try captureScreens()
case .window:
guard let app else {
throw CaptureError.appNotFound("No application specified for window capture")
}
return try captureApplicationWindow(app)
case .multi:
if let app {
return try captureAllApplicationWindows(app)
} else {
return try captureScreens()
}
}
}
private func outputResults(_ savedFiles: [SavedFile]) {
let data = ImageCaptureData(saved_files: savedFiles)
if jsonOutput {
outputSuccess(data: data)
} else {
print("Captured \(savedFiles.count) image(s):")
for file in savedFiles {
print(" \(file.path)")
}
}
}
private func handleError(_ error: Error) {
let captureError: CaptureError = if let err = error as? CaptureError {
err
} else {
.unknownError(error.localizedDescription)
}
// Log the full error details for debugging
Logger.shared.debug("Image capture error: \(error)")
// If it's a CaptureError with an underlying error, log that too
switch captureError {
case let .captureCreationFailed(underlyingError):
if let underlying = underlyingError {
Logger.shared.debug("Underlying capture creation error: \(underlying)")
}
case let .windowCaptureFailed(underlyingError):
if let underlying = underlyingError {
Logger.shared.debug("Underlying window capture error: \(underlying)")
}
case let .fileWriteError(_, underlyingError):
if let underlying = underlyingError {
Logger.shared.debug("Underlying file write error: \(underlying)")
}
default:
break
}
if jsonOutput {
let code: ErrorCode = switch captureError {
case .screenRecordingPermissionDenied:
.PERMISSION_ERROR_SCREEN_RECORDING
case .accessibilityPermissionDenied:
.PERMISSION_ERROR_ACCESSIBILITY
case .appNotFound:
.APP_NOT_FOUND
case .windowNotFound, .noWindowsFound:
.WINDOW_NOT_FOUND
case .fileWriteError:
.FILE_IO_ERROR
case .invalidArgument:
.INVALID_ARGUMENT
case .unknownError:
.UNKNOWN_ERROR
default:
.CAPTURE_FAILED
}
// Provide additional details for app not found errors
var details: String?
if case .appNotFound = captureError {
let runningApps = NSWorkspace.shared.runningApplications
.filter { $0.activationPolicy == .regular }
.compactMap(\.localizedName)
.sorted()
.joined(separator: ", ")
details = "Available applications: \(runningApps)"
}
outputError(
message: captureError.localizedDescription,
code: code,
details: details ?? "Image capture operation failed"
)
} else {
var localStandardErrorStream = FileHandleTextOutputStream(FileHandle.standardError)
print("Error: \(captureError.localizedDescription)", to: &localStandardErrorStream)
}
Foundation.exit(captureError.exitCode)
}
private func determineMode() -> CaptureMode {
if let mode {
return mode
}
return app != nil ? .window : .screen
}
private func captureScreens() throws(CaptureError) -> [SavedFile] {
let displays = try getActiveDisplays()
var savedFiles: [SavedFile] = []
if let screenIndex {
savedFiles = try captureSpecificScreen(displays: displays, screenIndex: screenIndex)
} else {
savedFiles = try captureAllScreens(displays: displays)
}
return savedFiles
}
private func getActiveDisplays() throws(CaptureError) -> [CGDirectDisplayID] {
var displayCount: UInt32 = 0
let result = CGGetActiveDisplayList(0, nil, &displayCount)
guard result == .success && displayCount > 0 else {
throw CaptureError.noDisplaysAvailable
}
var displays = [CGDirectDisplayID](repeating: 0, count: Int(displayCount))
let listResult = CGGetActiveDisplayList(displayCount, &displays, nil)
guard listResult == .success else {
throw CaptureError.noDisplaysAvailable
}
return displays
}
private func captureSpecificScreen(
displays: [CGDirectDisplayID],
screenIndex: Int
) throws(CaptureError) -> [SavedFile] {
if screenIndex >= 0 && screenIndex < displays.count {
let displayID = displays[screenIndex]
let labelSuffix = " (Index \(screenIndex))"
return try [captureSingleDisplay(displayID: displayID, index: screenIndex, labelSuffix: labelSuffix)]
} else {
Logger.shared.debug("Screen index \(screenIndex) is out of bounds. Capturing all screens instead.")
// When falling back to all screens, use fallback-aware capture to prevent filename conflicts
return try captureAllScreensWithFallback(displays: displays)
}
}
private func captureAllScreens(displays: [CGDirectDisplayID]) throws(CaptureError) -> [SavedFile] {
var savedFiles: [SavedFile] = []
for (index, displayID) in displays.enumerated() {
let savedFile = try captureSingleDisplay(displayID: displayID, index: index, labelSuffix: "")
savedFiles.append(savedFile)
}
return savedFiles
}
private func captureAllScreensWithFallback(displays: [CGDirectDisplayID]) throws(CaptureError) -> [SavedFile] {
var savedFiles: [SavedFile] = []
for (index, displayID) in displays.enumerated() {
let savedFile = try captureSingleDisplayWithFallback(displayID: displayID, index: index, labelSuffix: "")
savedFiles.append(savedFile)
}
return savedFiles
}
private func captureSingleDisplay(
displayID: CGDirectDisplayID,
index: Int,
labelSuffix: String
) throws(CaptureError) -> SavedFile {
let fileName = FileNameGenerator.generateFileName(displayIndex: index, format: format)
let filePath = OutputPathResolver.getOutputPath(basePath: path, fileName: fileName)
try captureDisplay(displayID, to: filePath)
return SavedFile(
path: filePath,
item_label: "Display \(index + 1)\(labelSuffix)",
window_title: nil,
window_id: nil,
window_index: nil,
mime_type: format == .png ? "image/png" : "image/jpeg"
)
}
private func captureSingleDisplayWithFallback(
displayID: CGDirectDisplayID,
index: Int,
labelSuffix: String
) throws(CaptureError) -> SavedFile {
let fileName = FileNameGenerator.generateFileName(displayIndex: index, format: format)
let filePath = OutputPathResolver.getOutputPathWithFallback(basePath: path, fileName: fileName)
try captureDisplay(displayID, to: filePath)
return SavedFile(
path: filePath,
item_label: "Display \(index + 1)\(labelSuffix)",
window_title: nil,
window_id: nil,
window_index: nil,
mime_type: format == .png ? "image/png" : "image/jpeg"
)
}
private func captureApplicationWindow(_ appIdentifier: String) throws -> [SavedFile] {
let targetApp: NSRunningApplication
do {
targetApp = try ApplicationFinder.findApplication(identifier: appIdentifier)
} catch let ApplicationError.notFound(identifier) {
throw CaptureError.appNotFound(identifier)
} catch let ApplicationError.ambiguous(identifier, matches) {
// For ambiguous matches, capture all windows from all matching applications
Logger.shared.debug("Multiple applications match '\(identifier)', capturing all windows from all matches")
return try captureWindowsFromMultipleApps(matches, appIdentifier: identifier)
}
if captureFocus == .foreground || (captureFocus == .auto && !targetApp.isActive) {
try PermissionsChecker.requireAccessibilityPermission()
targetApp.activate()
Thread.sleep(forTimeInterval: 0.2) // Brief delay for activation
}
let windows = try WindowManager.getWindowsForApp(pid: targetApp.processIdentifier)
guard !windows.isEmpty else {
throw CaptureError.noWindowsFound(targetApp.localizedName ?? appIdentifier)
}
let targetWindow: WindowData
if let windowTitle {
guard let window = windows.first(where: { $0.title.contains(windowTitle) }) else {
// Create detailed error message with available window titles for debugging
let availableTitles = windows.map { "\"\($0.title)\"" }.joined(separator: ", ")
let searchTerm = windowTitle
let appName = targetApp.localizedName ?? "Unknown"
Logger.shared.debug("Window not found. Searched for '\(searchTerm)' in \(appName). Available windows: \(availableTitles)")
throw CaptureError.windowTitleNotFound(searchTerm, appName, availableTitles)
}
targetWindow = window
} else if let windowIndex {
guard windowIndex >= 0 && windowIndex < windows.count else {
throw CaptureError.invalidWindowIndex(windowIndex)
}
targetWindow = windows[windowIndex]
} else {
targetWindow = windows[0] // frontmost window
}
let fileName = FileNameGenerator.generateFileName(
appName: targetApp.localizedName, windowTitle: targetWindow.title, format: format
)
let filePath = OutputPathResolver.getOutputPath(basePath: path, fileName: fileName)
try captureWindow(targetWindow, to: filePath)
let savedFile = SavedFile(
path: filePath,
item_label: targetApp.localizedName,
window_title: targetWindow.title,
window_id: targetWindow.windowId,
window_index: targetWindow.windowIndex,
mime_type: format == .png ? "image/png" : "image/jpeg"
)
return [savedFile]
}
private func captureAllApplicationWindows(_ appIdentifier: String) throws -> [SavedFile] {
let targetApp: NSRunningApplication
do {
targetApp = try ApplicationFinder.findApplication(identifier: appIdentifier)
} catch let ApplicationError.notFound(identifier) {
throw CaptureError.appNotFound(identifier)
} catch let ApplicationError.ambiguous(identifier, matches) {
// For ambiguous matches, capture all windows from all matching applications
Logger.shared.debug("Multiple applications match '\(identifier)', capturing all windows from all matches")
return try captureWindowsFromMultipleApps(matches, appIdentifier: identifier)
}
if captureFocus == .foreground || (captureFocus == .auto && !targetApp.isActive) {
try PermissionsChecker.requireAccessibilityPermission()
targetApp.activate()
Thread.sleep(forTimeInterval: 0.2)
}
let windows = try WindowManager.getWindowsForApp(pid: targetApp.processIdentifier)
guard !windows.isEmpty else {
throw CaptureError.noWindowsFound(targetApp.localizedName ?? appIdentifier)
}
var savedFiles: [SavedFile] = []
for (index, window) in windows.enumerated() {
let fileName = FileNameGenerator.generateFileName(
appName: targetApp.localizedName, windowIndex: index, windowTitle: window.title, format: format
)
let filePath = OutputPathResolver.getOutputPath(basePath: path, fileName: fileName)
try captureWindow(window, to: filePath)
let savedFile = SavedFile(
path: filePath,
item_label: targetApp.localizedName,
window_title: window.title,
window_id: window.windowId,
window_index: index,
mime_type: format == .png ? "image/png" : "image/jpeg"
)
savedFiles.append(savedFile)
}
return savedFiles
}
private func captureWindowsFromMultipleApps(
_ apps: [NSRunningApplication], appIdentifier: String
) throws -> [SavedFile] {
var allSavedFiles: [SavedFile] = []
var totalWindowIndex = 0
for targetApp in apps {
// Log which app we're processing
Logger.shared.debug("Capturing windows for app: \(targetApp.localizedName ?? "Unknown")")
// Handle focus behavior for each app (if needed)
if captureFocus == .foreground || (captureFocus == .auto && !targetApp.isActive) {
try PermissionsChecker.requireAccessibilityPermission()
targetApp.activate()
Thread.sleep(forTimeInterval: 0.2)
}
let windows = try WindowManager.getWindowsForApp(pid: targetApp.processIdentifier)
if windows.isEmpty {
Logger.shared.debug("No windows found for app: \(targetApp.localizedName ?? "Unknown")")
continue
}
for window in windows {
let fileName = FileNameGenerator.generateFileName(
appName: targetApp.localizedName,
windowIndex: totalWindowIndex,
windowTitle: window.title,
format: format
)
let filePath = OutputPathResolver.getOutputPath(basePath: path, fileName: fileName)
try captureWindow(window, to: filePath)
let savedFile = SavedFile(
path: filePath,
item_label: targetApp.localizedName,
window_title: window.title,
window_id: window.windowId,
window_index: totalWindowIndex,
mime_type: format == .png ? "image/png" : "image/jpeg"
)
allSavedFiles.append(savedFile)
totalWindowIndex += 1
}
}
guard !allSavedFiles.isEmpty else {
throw CaptureError.noWindowsFound("No windows found for any matching applications of '\(appIdentifier)'")
}
return allSavedFiles
}
private func captureDisplay(_ displayID: CGDirectDisplayID, to path: String) throws(CaptureError) {
do {
let semaphore = DispatchSemaphore(value: 0)
var captureError: Error?
Task {
do {
try await ScreenCapture.captureDisplay(displayID, to: path, format: format)
} catch {
captureError = error
}
semaphore.signal()
}
semaphore.wait()
if let error = captureError {
throw error
}
} catch let error as CaptureError {
// Re-throw CaptureError as-is
throw error
} catch {
// Check if this is a permission error from ScreenCaptureKit
if PermissionErrorDetector.isScreenRecordingPermissionError(error) {
throw CaptureError.screenRecordingPermissionDenied
}
throw CaptureError.captureCreationFailed(error)
}
}
private func captureWindow(_ window: WindowData, to path: String) throws(CaptureError) {
do {
let semaphore = DispatchSemaphore(value: 0)
var captureError: Error?
Task {
do {
try await ScreenCapture.captureWindow(window, to: path, format: format)
} catch {
captureError = error
}
semaphore.signal()
}
semaphore.wait()
if let error = captureError {
throw error
}
} catch let error as CaptureError {
// Re-throw CaptureError as-is
throw error
} catch {
// Check if this is a permission error from ScreenCaptureKit
if PermissionErrorDetector.isScreenRecordingPermissionError(error) {
throw CaptureError.screenRecordingPermissionDenied
}
throw CaptureError.windowCaptureFailed(error)
}
}
}