fix: Add defensive validation for invalid image formats with automatic PNG fallback

Implements robust handling for invalid image formats (like 'bmp', 'gif', 'webp') that bypass schema validation:

- Added defensive format validation in image tool handler
- Automatic path correction to ensure file extensions match actual format used
- Warning messages in response when format fallback occurs
- Comprehensive unit and integration test coverage for edge cases

This ensures invalid formats automatically fall back to PNG as requested, preventing
Swift CLI rejection and incorrect file extensions in output paths.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Peter Steinberger 2025-06-08 07:44:17 +01:00
parent 977c22d37a
commit ab882069b4
5 changed files with 514 additions and 2 deletions

View file

@ -10,6 +10,7 @@ import { performAutomaticAnalysis } from "../utils/image-analysis.js";
import { buildImageSummary } from "../utils/image-summary.js";
import { buildSwiftCliArgs, resolveImagePath } from "../utils/image-cli-args.js";
import { parseAIProviders } from "../utils/ai-providers.js";
import * as path from "path";
export { imageToolSchema } from "../types/index.js";
@ -36,6 +37,18 @@ export async function imageToolHandler(
// The format here is already normalized (lowercase, jpeg->jpg mapping applied)
let effectiveFormat = input.format;
// Defensive validation: ensure format is one of the valid values
// This should not be necessary due to schema preprocessing, but provides extra safety
const validFormats = ["png", "jpg", "data"];
if (effectiveFormat && !validFormats.includes(effectiveFormat)) {
logger.warn(
{ originalFormat: effectiveFormat, fallbackFormat: "png" },
`Invalid format '${effectiveFormat}' detected, falling back to PNG`
);
effectiveFormat = "png";
formatWarning = `Invalid format '${input.format}' was provided. Automatically using PNG format instead.`;
}
// Auto-fallback to PNG for screen captures with format 'data'
if (isScreenCapture && effectiveFormat === "data") {
logger.warn("Screen capture with format 'data' auto-fallback to PNG due to size constraints");
@ -46,11 +59,38 @@ export async function imageToolHandler(
// Determine effective path and format for Swift CLI
const swiftFormat = effectiveFormat === "data" ? "png" : (effectiveFormat || "png");
// Create a corrected input object if format or path needs to be adjusted
let correctedInput = input;
// If format was corrected and we have a path, update the file extension to match the actual format
if (input.format && input.format !== effectiveFormat && input.path) {
const originalPath = input.path;
const parsedPath = path.parse(originalPath);
// Map format to appropriate extension
const extensionMap: { [key: string]: string } = {
"png": ".png",
"jpg": ".jpg",
"jpeg": ".jpg",
"data": ".png" // data format saves as PNG
};
const newExtension = extensionMap[effectiveFormat || "png"] || ".png";
const correctedPath = path.join(parsedPath.dir, parsedPath.name + newExtension);
logger.debug(
{ originalPath, correctedPath, originalFormat: input.format, correctedFormat: effectiveFormat },
"Correcting file extension to match format"
);
correctedInput = { ...input, path: correctedPath };
}
// Resolve the effective path using the centralized logic
const { effectivePath, tempDirUsed: tempDir } = await resolveImagePath(input, logger);
const { effectivePath, tempDirUsed: tempDir } = await resolveImagePath(correctedInput, logger);
_tempDirUsed = tempDir;
const args = buildSwiftCliArgs(input, effectivePath, swiftFormat, logger);
const args = buildSwiftCliArgs(correctedInput, effectivePath, swiftFormat, logger);
const swiftResponse = await executeSwiftCli(args, logger, { timeout: 30000 });

View file

@ -0,0 +1,94 @@
import { describe, it, expect, beforeEach } from "vitest";
import { imageToolHandler } from "../../src/tools/image";
import { initializeSwiftCliPath } from "../../src/utils/peekaboo-cli";
import { pino } from "pino";
import * as fs from "fs/promises";
import * as path from "path";
import * as os from "os";
// Initialize Swift CLI path (assuming 'peekaboo' binary is at project root)
const packageRootDir = path.resolve(__dirname, "..", ".."); // Adjust path from tests/integration to project root
initializeSwiftCliPath(packageRootDir);
const mockLogger = pino({ level: "silent" });
const mockContext = { logger: mockLogger };
// Conditionally skip Swift-dependent tests on non-macOS platforms
const describeSwiftTests = globalThis.shouldSkipSwiftTests ? describe.skip : describe;
describeSwiftTests("Invalid Format Integration Tests", () => {
let tempDir: string;
beforeEach(async () => {
// Create a temporary directory for test files
tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "peekaboo-invalid-format-test-"));
});
it("should reject invalid format gracefully and not create files with wrong extensions", async () => {
const testPath = path.join(tempDir, "test_invalid_format.bmp");
// Test with invalid format 'bmp'
const result = await imageToolHandler(
{
format: "bmp",
path: testPath,
},
mockContext,
);
// Check what error we got
console.log("Result:", JSON.stringify(result, null, 2));
// The tool should succeed (format gets preprocessed to png)
expect(result.isError).toBeUndefined();
// Check if any files were created
if (result.saved_files && result.saved_files.length > 0) {
for (const savedFile of result.saved_files) {
// The actual saved file should have .png extension, not .bmp
expect(savedFile.path).not.toContain(".bmp");
expect(savedFile.path).toMatch(/\.png$/);
// Verify the file actually exists with the correct extension
const fileExists = await fs.access(savedFile.path).then(() => true).catch(() => false);
expect(fileExists).toBe(true);
// Verify no .bmp file was created
const bmpPath = savedFile.path.replace(/\.png$/, ".bmp");
const bmpExists = await fs.access(bmpPath).then(() => true).catch(() => false);
expect(bmpExists).toBe(false);
}
}
// The result content should not mention .bmp files
const resultText = result.content[0]?.text || "";
expect(resultText).not.toContain(".bmp");
});
it("should handle various invalid formats consistently", async () => {
const invalidFormats = ["gif", "webp", "tiff", "bmp", "xyz"];
for (const format of invalidFormats) {
const testPath = path.join(tempDir, `test_${format}.${format}`);
const result = await imageToolHandler(
{
format: format as any,
path: testPath,
},
mockContext,
);
// Should succeed with fallback
expect(result.isError).toBeUndefined();
if (result.saved_files && result.saved_files.length > 0) {
// All files should be saved as PNG regardless of input format
for (const savedFile of result.saved_files) {
expect(savedFile.path).toMatch(/\.png$/);
expect(savedFile.mime_type).toBe("image/png");
}
}
}
});
});

View file

@ -0,0 +1,155 @@
import { describe, it, expect, beforeEach, vi } from "vitest";
import { imageToolHandler } from "../../../src/tools/image";
import { executeSwiftCli } from "../../../src/utils/peekaboo-cli";
import { resolveImagePath } from "../../../src/utils/image-cli-args";
import { mockSwiftCli } from "../../mocks/peekaboo-cli.mock";
import { pino } from "pino";
// Mock the Swift CLI utility
vi.mock("../../../src/utils/peekaboo-cli");
// Mock image-cli-args module
vi.mock("../../../src/utils/image-cli-args", async () => {
const actual = await vi.importActual("../../../src/utils/image-cli-args");
return {
...actual,
resolveImagePath: vi.fn(),
};
});
const mockExecuteSwiftCli = executeSwiftCli as vi.MockedFunction<typeof executeSwiftCli>;
const mockResolveImagePath = resolveImagePath as vi.MockedFunction<typeof resolveImagePath>;
const mockLogger = pino({ level: "silent" });
const mockContext = { logger: mockLogger };
const MOCK_TEMP_DIR = "/tmp/peekaboo-img-XXXXXX";
describe("Defensive Format Validation", () => {
beforeEach(() => {
vi.clearAllMocks();
mockResolveImagePath.mockResolvedValue({
effectivePath: MOCK_TEMP_DIR,
tempDirUsed: MOCK_TEMP_DIR,
});
});
it("should catch and fix invalid formats that bypass schema preprocessing", async () => {
// Mock Swift CLI response
const mockResponse = mockSwiftCli.captureImage("screen", {
path: "/tmp/test.png",
format: "png",
});
mockExecuteSwiftCli.mockResolvedValue(mockResponse);
// Create a spy on logger.warn to check if defensive validation triggers
const loggerWarnSpy = vi.spyOn(mockLogger, "warn");
// Simulate a scenario where somehow an invalid format gets through
// (this should not happen with proper schema validation, but this is defensive)
const inputWithInvalidFormat = {
format: "bmp" as any, // Force an invalid format
path: "/tmp/test.bmp",
};
// Bypass schema validation by calling the handler directly
const result = await imageToolHandler(inputWithInvalidFormat, mockContext);
// Should succeed with PNG fallback
expect(result.isError).toBeUndefined();
// Should have logged a warning about the invalid format
expect(loggerWarnSpy).toHaveBeenCalledWith(
{ originalFormat: "bmp", fallbackFormat: "png" },
"Invalid format 'bmp' detected, falling back to PNG"
);
// Should call Swift CLI with PNG format, not BMP
expect(mockExecuteSwiftCli).toHaveBeenCalledWith(
expect.arrayContaining(["--format", "png"]),
mockLogger,
expect.objectContaining({ timeout: expect.any(Number) })
);
// Should not contain BMP in the arguments
const swiftCliCall = mockExecuteSwiftCli.mock.calls[0][0];
expect(swiftCliCall).not.toContain("bmp");
// Should include a warning in the response content
const allResponseText = result.content.map(item => item.text || "").join(" ");
expect(allResponseText).toContain("Invalid format 'bmp' was provided");
expect(allResponseText).toContain("Automatically using PNG format instead");
});
it("should not trigger defensive validation for valid formats", async () => {
const mockResponse = mockSwiftCli.captureImage("screen", {
path: "/tmp/test.png",
format: "png",
});
mockExecuteSwiftCli.mockResolvedValue(mockResponse);
const loggerWarnSpy = vi.spyOn(mockLogger, "warn");
// Use a valid format
const inputWithValidFormat = {
format: "png" as any,
path: "/tmp/test.png",
};
const result = await imageToolHandler(inputWithValidFormat, mockContext);
// Should succeed
expect(result.isError).toBeUndefined();
// Should NOT have logged any warning about invalid format
expect(loggerWarnSpy).not.toHaveBeenCalledWith(
expect.objectContaining({ originalFormat: expect.any(String) }),
expect.stringContaining("Invalid format")
);
// Response should not contain format warning
const allResponseText = result.content.map(item => item.text || "").join(" ");
expect(allResponseText).not.toContain("Invalid format");
expect(allResponseText).not.toContain("Automatically using PNG format instead");
});
it("should handle various invalid formats defensively", async () => {
const mockResponse = mockSwiftCli.captureImage("screen", {
path: "/tmp/test.png",
format: "png",
});
mockExecuteSwiftCli.mockResolvedValue(mockResponse);
const invalidFormats = ["bmp", "gif", "webp", "tiff", "svg", "raw"];
for (const invalidFormat of invalidFormats) {
vi.clearAllMocks();
const loggerWarnSpy = vi.spyOn(mockLogger, "warn");
const input = {
format: invalidFormat as any,
path: `/tmp/test.${invalidFormat}`,
};
const result = await imageToolHandler(input, mockContext);
// Should succeed with PNG fallback
expect(result.isError).toBeUndefined();
// Should have logged warning
expect(loggerWarnSpy).toHaveBeenCalledWith(
{ originalFormat: invalidFormat, fallbackFormat: "png" },
`Invalid format '${invalidFormat}' detected, falling back to PNG`
);
// Should call Swift CLI with PNG
expect(mockExecuteSwiftCli).toHaveBeenCalledWith(
expect.arrayContaining(["--format", "png"]),
mockLogger,
expect.objectContaining({ timeout: expect.any(Number) })
);
}
});
});

View file

@ -0,0 +1,136 @@
import { describe, it, expect, beforeEach, vi } from "vitest";
import { imageToolHandler } from "../../../src/tools/image";
import { executeSwiftCli, readImageAsBase64 } from "../../../src/utils/peekaboo-cli";
import { resolveImagePath } from "../../../src/utils/image-cli-args";
import { mockSwiftCli } from "../../mocks/peekaboo-cli.mock";
import { pino } from "pino";
// Mock the Swift CLI utility
vi.mock("../../../src/utils/peekaboo-cli");
// Mock fs/promises
vi.mock("fs/promises");
// Mock image-cli-args module
vi.mock("../../../src/utils/image-cli-args", async () => {
const actual = await vi.importActual("../../../src/utils/image-cli-args");
return {
...actual,
resolveImagePath: vi.fn(),
};
});
const mockExecuteSwiftCli = executeSwiftCli as vi.MockedFunction<
typeof executeSwiftCli
>;
const mockResolveImagePath = resolveImagePath as vi.MockedFunction<typeof resolveImagePath>;
const mockLogger = pino({ level: "silent" });
const mockContext = { logger: mockLogger };
const MOCK_TEMP_DIR = "/tmp/peekaboo-img-XXXXXX";
const MOCK_SAVED_FILE_PATH = "/tmp/test_invalid_format.png";
describe("Invalid Format Handling", () => {
beforeEach(() => {
vi.clearAllMocks();
// Mock resolveImagePath to return a temp directory
mockResolveImagePath.mockResolvedValue({
effectivePath: MOCK_TEMP_DIR,
tempDirUsed: MOCK_TEMP_DIR,
});
});
it("should fallback invalid format 'bmp' to 'png' and use correct extension in filename", async () => {
// Import schema to test preprocessing
const { imageToolSchema } = await import("../../../src/types/index.js");
// Mock Swift CLI response with PNG format (fallback)
const mockResponse = mockSwiftCli.captureImage("screen", {
path: MOCK_SAVED_FILE_PATH,
format: "png",
});
// Ensure the saved file has .png extension, not .bmp
const savedFileWithCorrectExtension = {
...mockResponse.data.saved_files[0],
path: "/tmp/test_invalid_format.png", // Should be .png, not .bmp
};
const correctedResponse = {
...mockResponse,
data: {
...mockResponse.data,
saved_files: [savedFileWithCorrectExtension],
},
};
mockExecuteSwiftCli.mockResolvedValue(correctedResponse);
// Test with invalid format 'bmp' - schema should preprocess to 'png'
const parsedInput = imageToolSchema.parse({
format: "bmp",
path: "/tmp/test_invalid_format.bmp"
});
// Validate that schema preprocessing worked
expect(parsedInput.format).toBe("png");
const result = await imageToolHandler(parsedInput, mockContext);
expect(result.isError).toBeUndefined();
// Should have called Swift CLI with PNG format, not BMP
expect(mockExecuteSwiftCli).toHaveBeenCalledWith(
expect.arrayContaining(["--format", "png"]),
mockLogger,
expect.objectContaining({ timeout: expect.any(Number) }),
);
// The saved file should have .png extension
expect(result.saved_files?.[0]?.path).toBe("/tmp/test_invalid_format.png");
expect(result.saved_files?.[0]?.path).not.toContain(".bmp");
// The result should not contain .bmp anywhere
const resultText = result.content[0]?.text || "";
expect(resultText).not.toContain(".bmp");
expect(resultText).toContain(".png");
});
it("should handle other invalid formats correctly", async () => {
const { imageToolSchema } = await import("../../../src/types/index.js");
const invalidFormats = ["gif", "webp", "tiff", "xyz", "bmp"];
for (const invalidFormat of invalidFormats) {
const parsedInput = imageToolSchema.parse({ format: invalidFormat });
// All invalid formats should be preprocessed to 'png'
expect(parsedInput.format).toBe("png");
}
// Empty string should become undefined (which will use default)
const emptyInput = imageToolSchema.parse({ format: "" });
expect(emptyInput.format).toBeUndefined();
});
it("should preserve valid formats", async () => {
const { imageToolSchema } = await import("../../../src/types/index.js");
const validFormats = [
{ input: "png", expected: "png" },
{ input: "PNG", expected: "png" }, // case insensitive
{ input: "jpg", expected: "jpg" },
{ input: "JPG", expected: "jpg" }, // case insensitive
{ input: "jpeg", expected: "jpg" }, // alias
{ input: "JPEG", expected: "jpg" }, // alias + case insensitive
{ input: "data", expected: "data" },
];
for (const { input, expected } of validFormats) {
const parsedInput = imageToolSchema.parse({ format: input });
expect(parsedInput.format).toBe(expected);
}
});
});

View file

@ -0,0 +1,87 @@
import { describe, it, expect } from "vitest";
import { buildSwiftCliArgs } from "../../../src/utils/image-cli-args";
describe("Format Preprocessing in Swift CLI Args", () => {
it("should use preprocessed format in Swift CLI arguments", async () => {
// Import and test the schema preprocessing
const { imageToolSchema } = await import("../../../src/types/index.js");
// Test that invalid format gets preprocessed correctly
const inputWithInvalidFormat = { format: "bmp", path: "/tmp/test.png" };
const preprocessedInput = imageToolSchema.parse(inputWithInvalidFormat);
// Verify preprocessing worked
expect(preprocessedInput.format).toBe("png");
// Test that buildSwiftCliArgs uses the preprocessed format
const swiftArgs = buildSwiftCliArgs(preprocessedInput, "/tmp/test.png");
// Should contain --format png, not --format bmp
expect(swiftArgs).toContain("--format");
const formatIndex = swiftArgs.indexOf("--format");
expect(swiftArgs[formatIndex + 1]).toBe("png");
expect(swiftArgs).not.toContain("bmp");
});
it("should handle various invalid formats consistently", async () => {
const { imageToolSchema } = await import("../../../src/types/index.js");
const invalidFormats = ["bmp", "gif", "webp", "tiff", "xyz"];
for (const invalidFormat of invalidFormats) {
const input = { format: invalidFormat, path: "/tmp/test.png" };
const preprocessedInput = imageToolSchema.parse(input);
// All should be preprocessed to png
expect(preprocessedInput.format).toBe("png");
const swiftArgs = buildSwiftCliArgs(preprocessedInput, "/tmp/test.png");
const formatIndex = swiftArgs.indexOf("--format");
// All should result in --format png
expect(swiftArgs[formatIndex + 1]).toBe("png");
expect(swiftArgs).not.toContain(invalidFormat);
}
});
it("should pass through valid formats correctly", async () => {
const { imageToolSchema } = await import("../../../src/types/index.js");
const validCases = [
{ input: "png", expected: "png" },
{ input: "PNG", expected: "png" },
{ input: "jpg", expected: "jpg" },
{ input: "JPG", expected: "jpg" },
{ input: "jpeg", expected: "jpg" },
{ input: "JPEG", expected: "jpg" },
];
for (const { input, expected } of validCases) {
const inputObj = { format: input, path: "/tmp/test.png" };
const preprocessedInput = imageToolSchema.parse(inputObj);
expect(preprocessedInput.format).toBe(expected);
const swiftArgs = buildSwiftCliArgs(preprocessedInput, "/tmp/test.png");
const formatIndex = swiftArgs.indexOf("--format");
expect(swiftArgs[formatIndex + 1]).toBe(expected);
}
});
it("should handle data format for Swift CLI (converts to png)", async () => {
const { imageToolSchema } = await import("../../../src/types/index.js");
const input = { format: "data", path: "/tmp/test.png" };
const preprocessedInput = imageToolSchema.parse(input);
expect(preprocessedInput.format).toBe("data");
// buildSwiftCliArgs should convert data format to png for Swift CLI
const swiftArgs = buildSwiftCliArgs(preprocessedInput, "/tmp/test.png");
const formatIndex = swiftArgs.indexOf("--format");
// Should be converted to png for Swift CLI
expect(swiftArgs[formatIndex + 1]).toBe("png");
});
});