diff --git a/mobile/openapi/README.md b/mobile/openapi/README.md index 7442f0c75..02bb0bc0c 100644 Binary files a/mobile/openapi/README.md and b/mobile/openapi/README.md differ diff --git a/mobile/openapi/lib/api.dart b/mobile/openapi/lib/api.dart index e6d52b642..503a71ecb 100644 Binary files a/mobile/openapi/lib/api.dart and b/mobile/openapi/lib/api.dart differ diff --git a/mobile/openapi/lib/api/assets_api.dart b/mobile/openapi/lib/api/assets_api.dart index 063f9ea43..384fe0d72 100644 Binary files a/mobile/openapi/lib/api/assets_api.dart and b/mobile/openapi/lib/api/assets_api.dart differ diff --git a/mobile/openapi/lib/api_client.dart b/mobile/openapi/lib/api_client.dart index 43057f25a..b20c04a2b 100644 Binary files a/mobile/openapi/lib/api_client.dart and b/mobile/openapi/lib/api_client.dart differ diff --git a/mobile/openapi/lib/model/asset_ocr_response_dto.dart b/mobile/openapi/lib/model/asset_ocr_response_dto.dart new file mode 100644 index 000000000..c7937c6eb Binary files /dev/null and b/mobile/openapi/lib/model/asset_ocr_response_dto.dart differ diff --git a/open-api/immich-openapi-specs.json b/open-api/immich-openapi-specs.json index bc81fad2a..29503b1ef 100644 --- a/open-api/immich-openapi-specs.json +++ b/open-api/immich-openapi-specs.json @@ -2491,6 +2491,53 @@ "description": "This endpoint requires the `asset.read` permission." } }, + "/assets/{id}/ocr": { + "get": { + "operationId": "getAssetOcr", + "parameters": [ + { + "name": "id", + "required": true, + "in": "path", + "schema": { + "format": "uuid", + "type": "string" + } + } + ], + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "items": { + "$ref": "#/components/schemas/AssetOcrResponseDto" + }, + "type": "array" + } + } + }, + "description": "" + } + }, + "security": [ + { + "bearer": [] + }, + { + "cookie": [] + }, + { + "api_key": [] + } + ], + "tags": [ + "Assets" + ], + "x-immich-permission": "asset.read", + "description": "This endpoint requires the `asset.read` permission." + } + }, "/assets/{id}/original": { "get": { "operationId": "downloadAsset", @@ -11117,6 +11164,88 @@ ], "type": "object" }, + "AssetOcrResponseDto": { + "properties": { + "assetId": { + "format": "uuid", + "type": "string" + }, + "boxScore": { + "description": "Confidence score for text detection box", + "format": "double", + "type": "number" + }, + "id": { + "format": "uuid", + "type": "string" + }, + "text": { + "description": "Recognized text", + "type": "string" + }, + "textScore": { + "description": "Confidence score for text recognition", + "format": "double", + "type": "number" + }, + "x1": { + "description": "Normalized x coordinate of box corner 1 (0-1)", + "format": "double", + "type": "number" + }, + "x2": { + "description": "Normalized x coordinate of box corner 2 (0-1)", + "format": "double", + "type": "number" + }, + "x3": { + "description": "Normalized x coordinate of box corner 3 (0-1)", + "format": "double", + "type": "number" + }, + "x4": { + "description": "Normalized x coordinate of box corner 4 (0-1)", + "format": "double", + "type": "number" + }, + "y1": { + "description": "Normalized y coordinate of box corner 1 (0-1)", + "format": "double", + "type": "number" + }, + "y2": { + "description": "Normalized y coordinate of box corner 2 (0-1)", + "format": "double", + "type": "number" + }, + "y3": { + "description": "Normalized y coordinate of box corner 3 (0-1)", + "format": "double", + "type": "number" + }, + "y4": { + "description": "Normalized y coordinate of box corner 4 (0-1)", + "format": "double", + "type": "number" + } + }, + "required": [ + "assetId", + "boxScore", + "id", + "text", + "textScore", + "x1", + "x2", + "x3", + "x4", + "y1", + "y2", + "y3", + "y4" + ], + "type": "object" + }, "AssetOrder": { "enum": [ "asc", diff --git a/open-api/typescript-sdk/src/fetch-client.ts b/open-api/typescript-sdk/src/fetch-client.ts index a20fa9925..f4801a192 100644 --- a/open-api/typescript-sdk/src/fetch-client.ts +++ b/open-api/typescript-sdk/src/fetch-client.ts @@ -546,6 +546,32 @@ export type AssetMetadataResponseDto = { export type AssetMetadataUpsertDto = { items: AssetMetadataUpsertItemDto[]; }; +export type AssetOcrResponseDto = { + assetId: string; + /** Confidence score for text detection box */ + boxScore: number; + id: string; + /** Recognized text */ + text: string; + /** Confidence score for text recognition */ + textScore: number; + /** Normalized x coordinate of box corner 1 (0-1) */ + x1: number; + /** Normalized x coordinate of box corner 2 (0-1) */ + x2: number; + /** Normalized x coordinate of box corner 3 (0-1) */ + x3: number; + /** Normalized x coordinate of box corner 4 (0-1) */ + x4: number; + /** Normalized y coordinate of box corner 1 (0-1) */ + y1: number; + /** Normalized y coordinate of box corner 2 (0-1) */ + y2: number; + /** Normalized y coordinate of box corner 3 (0-1) */ + y3: number; + /** Normalized y coordinate of box corner 4 (0-1) */ + y4: number; +}; export type AssetMediaReplaceDto = { assetData: Blob; deviceAssetId: string; @@ -2390,6 +2416,19 @@ export function getAssetMetadataByKey({ id, key }: { ...opts })); } +/** + * This endpoint requires the `asset.read` permission. + */ +export function getAssetOcr({ id }: { + id: string; +}, opts?: Oazapfts.RequestOpts) { + return oazapfts.ok(oazapfts.fetchJson<{ + status: 200; + data: AssetOcrResponseDto[]; + }>(`/assets/${encodeURIComponent(id)}/ocr`, { + ...opts + })); +} /** * This endpoint requires the `asset.download` permission. */ diff --git a/server/src/controllers/asset.controller.ts b/server/src/controllers/asset.controller.ts index 1f320f659..c57dc4ed2 100644 --- a/server/src/controllers/asset.controller.ts +++ b/server/src/controllers/asset.controller.ts @@ -16,6 +16,7 @@ import { UpdateAssetDto, } from 'src/dtos/asset.dto'; import { AuthDto } from 'src/dtos/auth.dto'; +import { AssetOcrResponseDto } from 'src/dtos/ocr.dto'; import { Permission, RouteKey } from 'src/enum'; import { Auth, Authenticated } from 'src/middleware/auth.guard'; import { AssetService } from 'src/services/asset.service'; @@ -95,6 +96,12 @@ export class AssetController { return this.service.getMetadata(auth, id); } + @Get(':id/ocr') + @Authenticated({ permission: Permission.AssetRead }) + getAssetOcr(@Auth() auth: AuthDto, @Param() { id }: UUIDParamDto): Promise { + return this.service.getOcr(auth, id); + } + @Put(':id/metadata') @Authenticated({ permission: Permission.AssetUpdate }) updateAssetMetadata( diff --git a/server/src/dtos/ocr.dto.ts b/server/src/dtos/ocr.dto.ts new file mode 100644 index 000000000..1e838d0ec --- /dev/null +++ b/server/src/dtos/ocr.dto.ts @@ -0,0 +1,42 @@ +import { ApiProperty } from '@nestjs/swagger'; + +export class AssetOcrResponseDto { + @ApiProperty({ type: 'string', format: 'uuid' }) + id!: string; + + @ApiProperty({ type: 'string', format: 'uuid' }) + assetId!: string; + + @ApiProperty({ type: 'number', format: 'double', description: 'Normalized x coordinate of box corner 1 (0-1)' }) + x1!: number; + + @ApiProperty({ type: 'number', format: 'double', description: 'Normalized y coordinate of box corner 1 (0-1)' }) + y1!: number; + + @ApiProperty({ type: 'number', format: 'double', description: 'Normalized x coordinate of box corner 2 (0-1)' }) + x2!: number; + + @ApiProperty({ type: 'number', format: 'double', description: 'Normalized y coordinate of box corner 2 (0-1)' }) + y2!: number; + + @ApiProperty({ type: 'number', format: 'double', description: 'Normalized x coordinate of box corner 3 (0-1)' }) + x3!: number; + + @ApiProperty({ type: 'number', format: 'double', description: 'Normalized y coordinate of box corner 3 (0-1)' }) + y3!: number; + + @ApiProperty({ type: 'number', format: 'double', description: 'Normalized x coordinate of box corner 4 (0-1)' }) + x4!: number; + + @ApiProperty({ type: 'number', format: 'double', description: 'Normalized y coordinate of box corner 4 (0-1)' }) + y4!: number; + + @ApiProperty({ type: 'number', format: 'double', description: 'Confidence score for text detection box' }) + boxScore!: number; + + @ApiProperty({ type: 'number', format: 'double', description: 'Confidence score for text recognition' }) + textScore!: number; + + @ApiProperty({ type: 'string', description: 'Recognized text' }) + text!: string; +} diff --git a/server/src/services/asset.service.spec.ts b/server/src/services/asset.service.spec.ts index 93861149c..4b0086c95 100755 --- a/server/src/services/asset.service.spec.ts +++ b/server/src/services/asset.service.spec.ts @@ -700,6 +700,42 @@ describe(AssetService.name, () => { }); }); + describe('getOcr', () => { + it('should require asset read permission', async () => { + mocks.access.asset.checkOwnerAccess.mockResolvedValue(new Set()); + + await expect(sut.getOcr(authStub.admin, 'asset-1')).rejects.toBeInstanceOf(BadRequestException); + + expect(mocks.ocr.getByAssetId).not.toHaveBeenCalled(); + }); + + it('should return OCR data for an asset', async () => { + const ocr1 = factory.assetOcr({ text: 'Hello World' }); + const ocr2 = factory.assetOcr({ text: 'Test Image' }); + + mocks.access.asset.checkOwnerAccess.mockResolvedValue(new Set(['asset-1'])); + mocks.ocr.getByAssetId.mockResolvedValue([ocr1, ocr2]); + + await expect(sut.getOcr(authStub.admin, 'asset-1')).resolves.toEqual([ocr1, ocr2]); + + expect(mocks.access.asset.checkOwnerAccess).toHaveBeenCalledWith( + authStub.admin.user.id, + new Set(['asset-1']), + undefined, + ); + expect(mocks.ocr.getByAssetId).toHaveBeenCalledWith('asset-1'); + }); + + it('should return empty array when no OCR data exists', async () => { + mocks.access.asset.checkOwnerAccess.mockResolvedValue(new Set(['asset-1'])); + mocks.ocr.getByAssetId.mockResolvedValue([]); + + await expect(sut.getOcr(authStub.admin, 'asset-1')).resolves.toEqual([]); + + expect(mocks.ocr.getByAssetId).toHaveBeenCalledWith('asset-1'); + }); + }); + describe('run', () => { it('should run the refresh faces job', async () => { mocks.access.asset.checkOwnerAccess.mockResolvedValue(new Set(['asset-1'])); diff --git a/server/src/services/asset.service.ts b/server/src/services/asset.service.ts index 6cb021974..eb66c326e 100644 --- a/server/src/services/asset.service.ts +++ b/server/src/services/asset.service.ts @@ -16,6 +16,7 @@ import { mapStats, } from 'src/dtos/asset.dto'; import { AuthDto } from 'src/dtos/auth.dto'; +import { AssetOcrResponseDto } from 'src/dtos/ocr.dto'; import { AssetMetadataKey, AssetStatus, AssetVisibility, JobName, JobStatus, Permission, QueueName } from 'src/enum'; import { BaseService } from 'src/services/base.service'; import { ISidecarWriteJob, JobItem, JobOf } from 'src/types'; @@ -289,6 +290,11 @@ export class AssetService extends BaseService { return this.assetRepository.getMetadata(id); } + async getOcr(auth: AuthDto, id: string): Promise { + await this.requireAccess({ auth, permission: Permission.AssetRead, ids: [id] }); + return this.ocrRepository.getByAssetId(id); + } + async upsertMetadata(auth: AuthDto, id: string, dto: AssetMetadataUpsertDto): Promise { await this.requireAccess({ auth, permission: Permission.AssetUpdate, ids: [id] }); return this.assetRepository.upsertMetadata(id, dto.items); diff --git a/server/test/small.factory.ts b/server/test/small.factory.ts index 09e7988f8..ea0df585e 100644 --- a/server/test/small.factory.ts +++ b/server/test/small.factory.ts @@ -309,10 +309,44 @@ const assetSidecarWriteFactory = (asset: Partial = {}) => ({ ...asset, }); +const assetOcrFactory = ( + ocr: { + id?: string; + assetId?: string; + x1?: number; + y1?: number; + x2?: number; + y2?: number; + x3?: number; + y3?: number; + x4?: number; + y4?: number; + boxScore?: number; + textScore?: number; + text?: string; + } = {}, +) => ({ + id: newUuid(), + assetId: newUuid(), + x1: 0.1, + y1: 0.2, + x2: 0.3, + y2: 0.2, + x3: 0.3, + y3: 0.4, + x4: 0.1, + y4: 0.4, + boxScore: 0.95, + textScore: 0.92, + text: 'Sample Text', + ...ocr, +}); + export const factory = { activity: activityFactory, apiKey: apiKeyFactory, asset: assetFactory, + assetOcr: assetOcrFactory, auth: authFactory, authApiKey: authApiKeyFactory, authUser: authUserFactory,