From 9098717c559c8ece7c6bbfaa6efd9e8c7db9d4af Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 28 Oct 2025 15:57:03 -0500 Subject: [PATCH] feat: getAssetOcr endpoint (#23331) * feat: getAssetOcr endpoint * pr feedback --- mobile/openapi/README.md | Bin 41174 -> 41321 bytes mobile/openapi/lib/api.dart | Bin 14676 -> 14718 bytes mobile/openapi/lib/api/assets_api.dart | Bin 42101 -> 43858 bytes mobile/openapi/lib/api_client.dart | Bin 36454 -> 36544 bytes .../lib/model/asset_ocr_response_dto.dart | Bin 0 -> 5809 bytes open-api/immich-openapi-specs.json | 129 ++++++++++++++++++ open-api/typescript-sdk/src/fetch-client.ts | 39 ++++++ server/src/controllers/asset.controller.ts | 7 + server/src/dtos/ocr.dto.ts | 42 ++++++ server/src/services/asset.service.spec.ts | 36 +++++ server/src/services/asset.service.ts | 6 + server/test/small.factory.ts | 34 +++++ 12 files changed, 293 insertions(+) create mode 100644 mobile/openapi/lib/model/asset_ocr_response_dto.dart create mode 100644 server/src/dtos/ocr.dto.ts diff --git a/mobile/openapi/README.md b/mobile/openapi/README.md index 7442f0c750fd04597621d6b7c3547dbeaf089d18..02bb0bc0c0fd310c400d9c8fc53b68d3507ba8c2 100644 GIT binary patch delta 73 zcmcb1km=JP6qU delta 12 TcmexYbfsv+dA`j*`C7FAF=7U- diff --git a/mobile/openapi/lib/api/assets_api.dart b/mobile/openapi/lib/api/assets_api.dart index 063f9ea43b47484495f055adec1743ec7d70271f..384fe0d72ac1dcd0b1dab1d729310109bbe2f1e1 100644 GIT binary patch delta 107 zcmex*g6YyVrVZB|C;#^}0Aj9|(4 zX5wI)&)fn;Z}v0KW#R)ef>MhM^7D#QT}tvNX-jiB78j?MfDGFkSho3^%zOTgwVmfLYY(A#7AQ=GRBMlM& delta 14 WcmX>wm+9FYrVYonHY;k+PX+)qy#|H= diff --git a/mobile/openapi/lib/model/asset_ocr_response_dto.dart b/mobile/openapi/lib/model/asset_ocr_response_dto.dart new file mode 100644 index 0000000000000000000000000000000000000000..c7937c6eb22b1e6e9c2f1b86254bc802992f0e8b GIT binary patch literal 5809 zcmbVQZExE)5dNNDaVdhv##Cp1`B0>D)}l#=c5Mu#*?=Jk3`L@2cCzS^bYm{ff8X6v zk|mptlYK};-OF?2@!qVXqrIaec=yNU=+$p$=Vur1F3(Qk!^h8O5uA?Td~^w4MyDS? z{I!o_B>6UtwH@|4-Pe2cDsOY8#dxChcp@`#42yhWL^>9kn2Fp*<-45Cm9{~RIJ=9T z8P6AXti?YIsZl+TXZU{}YkXRv4T5VER!>xJY@FM%Owpj6NTFHX1dU}f7iOHP{1(OR zG|tDbe;y~(Sev~)G-p9gK+Q!S&t(Vy-t6rqS!@ivGe+1^qA!G*qd{?M72NLu9H9>` zgk9*|Kn%IKgR6k(HAvoMB08uO5NHt=KzV+!Q)xPASJ;nf6ky$`<<|rqAL7ZNh80U2 zk|z3AsAV^#vg`$^{UCJ^qPifi3*x#Uu4CMXHeL|GSF2@y3x&U!MECMu;Vhqvmx&>Djn@A*cljB1mPEynS1 z*c#n@!zc!2r$V=3cmloxCUMrH`Vp0!A3yvGWVnvKe2M7bWy+$1_+>spgJ0$d2~_Kb zgiMKt{~J`!JEZe-gTrY@YE?QkwU=FD;{S%`vd7fk8nw^V{u*__)IpQ#7#IIHa2?~~ z|0b1j-5RcA+`x6o+`_G}FpbUhy-EdM5WBf*n$#V;=bNl$H$dwJX#D_fut{?PJQu)o0X$d3?cp)WrT4!Jiyin%N{;^`0V4e zkIw-<2N5{rV~Ycad~9mqkdJK)9P-`Z`jC(EC7%D{cOF6NaNv>230U>Xs2q>t=Ya`U z{tUh$35KvCegE)=BLZ0BN*@--JG@5e`UJ5iRvvjJS5nplWx5ifK1fxfGDZ$svFR-! zzz%ClF=A@IWDaUfET|2ptJ$QU(PY+aQafleYc{FU zH<>k?R27J+c|hb_BEkdBEYBSrA!$gp!zo4AvPN6=aknrkt^S(wLYy-2XPm@##oF;3N1CSx4Hrz)kQ34OR>ZD ze!FAUDG!{j_6kR)JI!M{)-6-Y?bP8cUDx6i+_7O&^x3;@$II>x2+ZoOy1iYxz3saF zUAq14x`SQ1gRQ!5*Lks`Y&);pbzZmqyl&Td-S+dkUFUV%&RgW-11CvQz^IN?1+1tB zpC#sBAiGW=L(T`RPK~nQQ|X>(*;}pdV*1@Rgh`wk5j~Wd?(fZ+~RTs{+SzwiqVpA9BU5qf!r`!*t>_^|h_0>3vLnfp8igFSM+pleJ2R9YeWd6z(U*bzr%ys$pRB zCsg8sms0N5I|rr6@8x_FLMo>ZI(O-RwEn33S#IdBXItUM&RBd^I)>^4&^R^*KQ+-O zB|jcFt`e!*m6zF}Hz`bcAo8g{)RQiiu{tbTyP_SJi|)Geihm17(--Ue_*~s-dAE%l zM%5~%@Z|VPjhh{%{O`LvX{Sh{bfRwnfxCF+B~h^$3viA^4f;|?fEZSR=%9!SCKksC zaf5LZ!PAl#?bah`yS4ulM^6lqr-WNE1k?({&S=7;HE%m`B^|o1%7L|@imKdQWpPwR zTA&1=wOW82!R1s`u~2<4#IQ3s^2d1J3k;JR0XWtfG)q|#!>6yKbNcdFgaOZb)yKx) zNw-S&lY90^^;^PED2LVjQoQ3VQmlBwsIC3bP!MngC|A8|)vs2Aa)qt4ki$k;t|~cf MQbfa_J6P)e1BRgizW@LL literal 0 HcmV?d00001 diff --git a/open-api/immich-openapi-specs.json b/open-api/immich-openapi-specs.json index bc81fad2a..29503b1ef 100644 --- a/open-api/immich-openapi-specs.json +++ b/open-api/immich-openapi-specs.json @@ -2491,6 +2491,53 @@ "description": "This endpoint requires the `asset.read` permission." } }, + "/assets/{id}/ocr": { + "get": { + "operationId": "getAssetOcr", + "parameters": [ + { + "name": "id", + "required": true, + "in": "path", + "schema": { + "format": "uuid", + "type": "string" + } + } + ], + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "items": { + "$ref": "#/components/schemas/AssetOcrResponseDto" + }, + "type": "array" + } + } + }, + "description": "" + } + }, + "security": [ + { + "bearer": [] + }, + { + "cookie": [] + }, + { + "api_key": [] + } + ], + "tags": [ + "Assets" + ], + "x-immich-permission": "asset.read", + "description": "This endpoint requires the `asset.read` permission." + } + }, "/assets/{id}/original": { "get": { "operationId": "downloadAsset", @@ -11117,6 +11164,88 @@ ], "type": "object" }, + "AssetOcrResponseDto": { + "properties": { + "assetId": { + "format": "uuid", + "type": "string" + }, + "boxScore": { + "description": "Confidence score for text detection box", + "format": "double", + "type": "number" + }, + "id": { + "format": "uuid", + "type": "string" + }, + "text": { + "description": "Recognized text", + "type": "string" + }, + "textScore": { + "description": "Confidence score for text recognition", + "format": "double", + "type": "number" + }, + "x1": { + "description": "Normalized x coordinate of box corner 1 (0-1)", + "format": "double", + "type": "number" + }, + "x2": { + "description": "Normalized x coordinate of box corner 2 (0-1)", + "format": "double", + "type": "number" + }, + "x3": { + "description": "Normalized x coordinate of box corner 3 (0-1)", + "format": "double", + "type": "number" + }, + "x4": { + "description": "Normalized x coordinate of box corner 4 (0-1)", + "format": "double", + "type": "number" + }, + "y1": { + "description": "Normalized y coordinate of box corner 1 (0-1)", + "format": "double", + "type": "number" + }, + "y2": { + "description": "Normalized y coordinate of box corner 2 (0-1)", + "format": "double", + "type": "number" + }, + "y3": { + "description": "Normalized y coordinate of box corner 3 (0-1)", + "format": "double", + "type": "number" + }, + "y4": { + "description": "Normalized y coordinate of box corner 4 (0-1)", + "format": "double", + "type": "number" + } + }, + "required": [ + "assetId", + "boxScore", + "id", + "text", + "textScore", + "x1", + "x2", + "x3", + "x4", + "y1", + "y2", + "y3", + "y4" + ], + "type": "object" + }, "AssetOrder": { "enum": [ "asc", diff --git a/open-api/typescript-sdk/src/fetch-client.ts b/open-api/typescript-sdk/src/fetch-client.ts index a20fa9925..f4801a192 100644 --- a/open-api/typescript-sdk/src/fetch-client.ts +++ b/open-api/typescript-sdk/src/fetch-client.ts @@ -546,6 +546,32 @@ export type AssetMetadataResponseDto = { export type AssetMetadataUpsertDto = { items: AssetMetadataUpsertItemDto[]; }; +export type AssetOcrResponseDto = { + assetId: string; + /** Confidence score for text detection box */ + boxScore: number; + id: string; + /** Recognized text */ + text: string; + /** Confidence score for text recognition */ + textScore: number; + /** Normalized x coordinate of box corner 1 (0-1) */ + x1: number; + /** Normalized x coordinate of box corner 2 (0-1) */ + x2: number; + /** Normalized x coordinate of box corner 3 (0-1) */ + x3: number; + /** Normalized x coordinate of box corner 4 (0-1) */ + x4: number; + /** Normalized y coordinate of box corner 1 (0-1) */ + y1: number; + /** Normalized y coordinate of box corner 2 (0-1) */ + y2: number; + /** Normalized y coordinate of box corner 3 (0-1) */ + y3: number; + /** Normalized y coordinate of box corner 4 (0-1) */ + y4: number; +}; export type AssetMediaReplaceDto = { assetData: Blob; deviceAssetId: string; @@ -2390,6 +2416,19 @@ export function getAssetMetadataByKey({ id, key }: { ...opts })); } +/** + * This endpoint requires the `asset.read` permission. + */ +export function getAssetOcr({ id }: { + id: string; +}, opts?: Oazapfts.RequestOpts) { + return oazapfts.ok(oazapfts.fetchJson<{ + status: 200; + data: AssetOcrResponseDto[]; + }>(`/assets/${encodeURIComponent(id)}/ocr`, { + ...opts + })); +} /** * This endpoint requires the `asset.download` permission. */ diff --git a/server/src/controllers/asset.controller.ts b/server/src/controllers/asset.controller.ts index 1f320f659..c57dc4ed2 100644 --- a/server/src/controllers/asset.controller.ts +++ b/server/src/controllers/asset.controller.ts @@ -16,6 +16,7 @@ import { UpdateAssetDto, } from 'src/dtos/asset.dto'; import { AuthDto } from 'src/dtos/auth.dto'; +import { AssetOcrResponseDto } from 'src/dtos/ocr.dto'; import { Permission, RouteKey } from 'src/enum'; import { Auth, Authenticated } from 'src/middleware/auth.guard'; import { AssetService } from 'src/services/asset.service'; @@ -95,6 +96,12 @@ export class AssetController { return this.service.getMetadata(auth, id); } + @Get(':id/ocr') + @Authenticated({ permission: Permission.AssetRead }) + getAssetOcr(@Auth() auth: AuthDto, @Param() { id }: UUIDParamDto): Promise { + return this.service.getOcr(auth, id); + } + @Put(':id/metadata') @Authenticated({ permission: Permission.AssetUpdate }) updateAssetMetadata( diff --git a/server/src/dtos/ocr.dto.ts b/server/src/dtos/ocr.dto.ts new file mode 100644 index 000000000..1e838d0ec --- /dev/null +++ b/server/src/dtos/ocr.dto.ts @@ -0,0 +1,42 @@ +import { ApiProperty } from '@nestjs/swagger'; + +export class AssetOcrResponseDto { + @ApiProperty({ type: 'string', format: 'uuid' }) + id!: string; + + @ApiProperty({ type: 'string', format: 'uuid' }) + assetId!: string; + + @ApiProperty({ type: 'number', format: 'double', description: 'Normalized x coordinate of box corner 1 (0-1)' }) + x1!: number; + + @ApiProperty({ type: 'number', format: 'double', description: 'Normalized y coordinate of box corner 1 (0-1)' }) + y1!: number; + + @ApiProperty({ type: 'number', format: 'double', description: 'Normalized x coordinate of box corner 2 (0-1)' }) + x2!: number; + + @ApiProperty({ type: 'number', format: 'double', description: 'Normalized y coordinate of box corner 2 (0-1)' }) + y2!: number; + + @ApiProperty({ type: 'number', format: 'double', description: 'Normalized x coordinate of box corner 3 (0-1)' }) + x3!: number; + + @ApiProperty({ type: 'number', format: 'double', description: 'Normalized y coordinate of box corner 3 (0-1)' }) + y3!: number; + + @ApiProperty({ type: 'number', format: 'double', description: 'Normalized x coordinate of box corner 4 (0-1)' }) + x4!: number; + + @ApiProperty({ type: 'number', format: 'double', description: 'Normalized y coordinate of box corner 4 (0-1)' }) + y4!: number; + + @ApiProperty({ type: 'number', format: 'double', description: 'Confidence score for text detection box' }) + boxScore!: number; + + @ApiProperty({ type: 'number', format: 'double', description: 'Confidence score for text recognition' }) + textScore!: number; + + @ApiProperty({ type: 'string', description: 'Recognized text' }) + text!: string; +} diff --git a/server/src/services/asset.service.spec.ts b/server/src/services/asset.service.spec.ts index 93861149c..4b0086c95 100755 --- a/server/src/services/asset.service.spec.ts +++ b/server/src/services/asset.service.spec.ts @@ -700,6 +700,42 @@ describe(AssetService.name, () => { }); }); + describe('getOcr', () => { + it('should require asset read permission', async () => { + mocks.access.asset.checkOwnerAccess.mockResolvedValue(new Set()); + + await expect(sut.getOcr(authStub.admin, 'asset-1')).rejects.toBeInstanceOf(BadRequestException); + + expect(mocks.ocr.getByAssetId).not.toHaveBeenCalled(); + }); + + it('should return OCR data for an asset', async () => { + const ocr1 = factory.assetOcr({ text: 'Hello World' }); + const ocr2 = factory.assetOcr({ text: 'Test Image' }); + + mocks.access.asset.checkOwnerAccess.mockResolvedValue(new Set(['asset-1'])); + mocks.ocr.getByAssetId.mockResolvedValue([ocr1, ocr2]); + + await expect(sut.getOcr(authStub.admin, 'asset-1')).resolves.toEqual([ocr1, ocr2]); + + expect(mocks.access.asset.checkOwnerAccess).toHaveBeenCalledWith( + authStub.admin.user.id, + new Set(['asset-1']), + undefined, + ); + expect(mocks.ocr.getByAssetId).toHaveBeenCalledWith('asset-1'); + }); + + it('should return empty array when no OCR data exists', async () => { + mocks.access.asset.checkOwnerAccess.mockResolvedValue(new Set(['asset-1'])); + mocks.ocr.getByAssetId.mockResolvedValue([]); + + await expect(sut.getOcr(authStub.admin, 'asset-1')).resolves.toEqual([]); + + expect(mocks.ocr.getByAssetId).toHaveBeenCalledWith('asset-1'); + }); + }); + describe('run', () => { it('should run the refresh faces job', async () => { mocks.access.asset.checkOwnerAccess.mockResolvedValue(new Set(['asset-1'])); diff --git a/server/src/services/asset.service.ts b/server/src/services/asset.service.ts index 6cb021974..eb66c326e 100644 --- a/server/src/services/asset.service.ts +++ b/server/src/services/asset.service.ts @@ -16,6 +16,7 @@ import { mapStats, } from 'src/dtos/asset.dto'; import { AuthDto } from 'src/dtos/auth.dto'; +import { AssetOcrResponseDto } from 'src/dtos/ocr.dto'; import { AssetMetadataKey, AssetStatus, AssetVisibility, JobName, JobStatus, Permission, QueueName } from 'src/enum'; import { BaseService } from 'src/services/base.service'; import { ISidecarWriteJob, JobItem, JobOf } from 'src/types'; @@ -289,6 +290,11 @@ export class AssetService extends BaseService { return this.assetRepository.getMetadata(id); } + async getOcr(auth: AuthDto, id: string): Promise { + await this.requireAccess({ auth, permission: Permission.AssetRead, ids: [id] }); + return this.ocrRepository.getByAssetId(id); + } + async upsertMetadata(auth: AuthDto, id: string, dto: AssetMetadataUpsertDto): Promise { await this.requireAccess({ auth, permission: Permission.AssetUpdate, ids: [id] }); return this.assetRepository.upsertMetadata(id, dto.items); diff --git a/server/test/small.factory.ts b/server/test/small.factory.ts index 09e7988f8..ea0df585e 100644 --- a/server/test/small.factory.ts +++ b/server/test/small.factory.ts @@ -309,10 +309,44 @@ const assetSidecarWriteFactory = (asset: Partial = {}) => ({ ...asset, }); +const assetOcrFactory = ( + ocr: { + id?: string; + assetId?: string; + x1?: number; + y1?: number; + x2?: number; + y2?: number; + x3?: number; + y3?: number; + x4?: number; + y4?: number; + boxScore?: number; + textScore?: number; + text?: string; + } = {}, +) => ({ + id: newUuid(), + assetId: newUuid(), + x1: 0.1, + y1: 0.2, + x2: 0.3, + y2: 0.2, + x3: 0.3, + y3: 0.4, + x4: 0.1, + y4: 0.4, + boxScore: 0.95, + textScore: 0.92, + text: 'Sample Text', + ...ocr, +}); + export const factory = { activity: activityFactory, apiKey: apiKeyFactory, asset: assetFactory, + assetOcr: assetOcrFactory, auth: authFactory, authApiKey: authApiKeyFactory, authUser: authUserFactory,