mirror of
https://github.com/samsonjs/immich.git
synced 2026-04-27 15:07:45 +00:00
feat: getAssetOcr endpoint (#23331)
* feat: getAssetOcr endpoint * pr feedback
This commit is contained in:
parent
8d25f81bec
commit
9098717c55
12 changed files with 293 additions and 0 deletions
BIN
mobile/openapi/README.md
generated
BIN
mobile/openapi/README.md
generated
Binary file not shown.
BIN
mobile/openapi/lib/api.dart
generated
BIN
mobile/openapi/lib/api.dart
generated
Binary file not shown.
BIN
mobile/openapi/lib/api/assets_api.dart
generated
BIN
mobile/openapi/lib/api/assets_api.dart
generated
Binary file not shown.
BIN
mobile/openapi/lib/api_client.dart
generated
BIN
mobile/openapi/lib/api_client.dart
generated
Binary file not shown.
BIN
mobile/openapi/lib/model/asset_ocr_response_dto.dart
generated
Normal file
BIN
mobile/openapi/lib/model/asset_ocr_response_dto.dart
generated
Normal file
Binary file not shown.
|
|
@ -2491,6 +2491,53 @@
|
||||||
"description": "This endpoint requires the `asset.read` permission."
|
"description": "This endpoint requires the `asset.read` permission."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"/assets/{id}/ocr": {
|
||||||
|
"get": {
|
||||||
|
"operationId": "getAssetOcr",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "id",
|
||||||
|
"required": true,
|
||||||
|
"in": "path",
|
||||||
|
"schema": {
|
||||||
|
"format": "uuid",
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/components/schemas/AssetOcrResponseDto"
|
||||||
|
},
|
||||||
|
"type": "array"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"description": ""
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"security": [
|
||||||
|
{
|
||||||
|
"bearer": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cookie": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"api_key": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"tags": [
|
||||||
|
"Assets"
|
||||||
|
],
|
||||||
|
"x-immich-permission": "asset.read",
|
||||||
|
"description": "This endpoint requires the `asset.read` permission."
|
||||||
|
}
|
||||||
|
},
|
||||||
"/assets/{id}/original": {
|
"/assets/{id}/original": {
|
||||||
"get": {
|
"get": {
|
||||||
"operationId": "downloadAsset",
|
"operationId": "downloadAsset",
|
||||||
|
|
@ -11117,6 +11164,88 @@
|
||||||
],
|
],
|
||||||
"type": "object"
|
"type": "object"
|
||||||
},
|
},
|
||||||
|
"AssetOcrResponseDto": {
|
||||||
|
"properties": {
|
||||||
|
"assetId": {
|
||||||
|
"format": "uuid",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"boxScore": {
|
||||||
|
"description": "Confidence score for text detection box",
|
||||||
|
"format": "double",
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"id": {
|
||||||
|
"format": "uuid",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"text": {
|
||||||
|
"description": "Recognized text",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"textScore": {
|
||||||
|
"description": "Confidence score for text recognition",
|
||||||
|
"format": "double",
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"x1": {
|
||||||
|
"description": "Normalized x coordinate of box corner 1 (0-1)",
|
||||||
|
"format": "double",
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"x2": {
|
||||||
|
"description": "Normalized x coordinate of box corner 2 (0-1)",
|
||||||
|
"format": "double",
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"x3": {
|
||||||
|
"description": "Normalized x coordinate of box corner 3 (0-1)",
|
||||||
|
"format": "double",
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"x4": {
|
||||||
|
"description": "Normalized x coordinate of box corner 4 (0-1)",
|
||||||
|
"format": "double",
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"y1": {
|
||||||
|
"description": "Normalized y coordinate of box corner 1 (0-1)",
|
||||||
|
"format": "double",
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"y2": {
|
||||||
|
"description": "Normalized y coordinate of box corner 2 (0-1)",
|
||||||
|
"format": "double",
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"y3": {
|
||||||
|
"description": "Normalized y coordinate of box corner 3 (0-1)",
|
||||||
|
"format": "double",
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"y4": {
|
||||||
|
"description": "Normalized y coordinate of box corner 4 (0-1)",
|
||||||
|
"format": "double",
|
||||||
|
"type": "number"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": [
|
||||||
|
"assetId",
|
||||||
|
"boxScore",
|
||||||
|
"id",
|
||||||
|
"text",
|
||||||
|
"textScore",
|
||||||
|
"x1",
|
||||||
|
"x2",
|
||||||
|
"x3",
|
||||||
|
"x4",
|
||||||
|
"y1",
|
||||||
|
"y2",
|
||||||
|
"y3",
|
||||||
|
"y4"
|
||||||
|
],
|
||||||
|
"type": "object"
|
||||||
|
},
|
||||||
"AssetOrder": {
|
"AssetOrder": {
|
||||||
"enum": [
|
"enum": [
|
||||||
"asc",
|
"asc",
|
||||||
|
|
|
||||||
|
|
@ -546,6 +546,32 @@ export type AssetMetadataResponseDto = {
|
||||||
export type AssetMetadataUpsertDto = {
|
export type AssetMetadataUpsertDto = {
|
||||||
items: AssetMetadataUpsertItemDto[];
|
items: AssetMetadataUpsertItemDto[];
|
||||||
};
|
};
|
||||||
|
export type AssetOcrResponseDto = {
|
||||||
|
assetId: string;
|
||||||
|
/** Confidence score for text detection box */
|
||||||
|
boxScore: number;
|
||||||
|
id: string;
|
||||||
|
/** Recognized text */
|
||||||
|
text: string;
|
||||||
|
/** Confidence score for text recognition */
|
||||||
|
textScore: number;
|
||||||
|
/** Normalized x coordinate of box corner 1 (0-1) */
|
||||||
|
x1: number;
|
||||||
|
/** Normalized x coordinate of box corner 2 (0-1) */
|
||||||
|
x2: number;
|
||||||
|
/** Normalized x coordinate of box corner 3 (0-1) */
|
||||||
|
x3: number;
|
||||||
|
/** Normalized x coordinate of box corner 4 (0-1) */
|
||||||
|
x4: number;
|
||||||
|
/** Normalized y coordinate of box corner 1 (0-1) */
|
||||||
|
y1: number;
|
||||||
|
/** Normalized y coordinate of box corner 2 (0-1) */
|
||||||
|
y2: number;
|
||||||
|
/** Normalized y coordinate of box corner 3 (0-1) */
|
||||||
|
y3: number;
|
||||||
|
/** Normalized y coordinate of box corner 4 (0-1) */
|
||||||
|
y4: number;
|
||||||
|
};
|
||||||
export type AssetMediaReplaceDto = {
|
export type AssetMediaReplaceDto = {
|
||||||
assetData: Blob;
|
assetData: Blob;
|
||||||
deviceAssetId: string;
|
deviceAssetId: string;
|
||||||
|
|
@ -2390,6 +2416,19 @@ export function getAssetMetadataByKey({ id, key }: {
|
||||||
...opts
|
...opts
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
* This endpoint requires the `asset.read` permission.
|
||||||
|
*/
|
||||||
|
export function getAssetOcr({ id }: {
|
||||||
|
id: string;
|
||||||
|
}, opts?: Oazapfts.RequestOpts) {
|
||||||
|
return oazapfts.ok(oazapfts.fetchJson<{
|
||||||
|
status: 200;
|
||||||
|
data: AssetOcrResponseDto[];
|
||||||
|
}>(`/assets/${encodeURIComponent(id)}/ocr`, {
|
||||||
|
...opts
|
||||||
|
}));
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* This endpoint requires the `asset.download` permission.
|
* This endpoint requires the `asset.download` permission.
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,7 @@ import {
|
||||||
UpdateAssetDto,
|
UpdateAssetDto,
|
||||||
} from 'src/dtos/asset.dto';
|
} from 'src/dtos/asset.dto';
|
||||||
import { AuthDto } from 'src/dtos/auth.dto';
|
import { AuthDto } from 'src/dtos/auth.dto';
|
||||||
|
import { AssetOcrResponseDto } from 'src/dtos/ocr.dto';
|
||||||
import { Permission, RouteKey } from 'src/enum';
|
import { Permission, RouteKey } from 'src/enum';
|
||||||
import { Auth, Authenticated } from 'src/middleware/auth.guard';
|
import { Auth, Authenticated } from 'src/middleware/auth.guard';
|
||||||
import { AssetService } from 'src/services/asset.service';
|
import { AssetService } from 'src/services/asset.service';
|
||||||
|
|
@ -95,6 +96,12 @@ export class AssetController {
|
||||||
return this.service.getMetadata(auth, id);
|
return this.service.getMetadata(auth, id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Get(':id/ocr')
|
||||||
|
@Authenticated({ permission: Permission.AssetRead })
|
||||||
|
getAssetOcr(@Auth() auth: AuthDto, @Param() { id }: UUIDParamDto): Promise<AssetOcrResponseDto[]> {
|
||||||
|
return this.service.getOcr(auth, id);
|
||||||
|
}
|
||||||
|
|
||||||
@Put(':id/metadata')
|
@Put(':id/metadata')
|
||||||
@Authenticated({ permission: Permission.AssetUpdate })
|
@Authenticated({ permission: Permission.AssetUpdate })
|
||||||
updateAssetMetadata(
|
updateAssetMetadata(
|
||||||
|
|
|
||||||
42
server/src/dtos/ocr.dto.ts
Normal file
42
server/src/dtos/ocr.dto.ts
Normal file
|
|
@ -0,0 +1,42 @@
|
||||||
|
import { ApiProperty } from '@nestjs/swagger';
|
||||||
|
|
||||||
|
export class AssetOcrResponseDto {
|
||||||
|
@ApiProperty({ type: 'string', format: 'uuid' })
|
||||||
|
id!: string;
|
||||||
|
|
||||||
|
@ApiProperty({ type: 'string', format: 'uuid' })
|
||||||
|
assetId!: string;
|
||||||
|
|
||||||
|
@ApiProperty({ type: 'number', format: 'double', description: 'Normalized x coordinate of box corner 1 (0-1)' })
|
||||||
|
x1!: number;
|
||||||
|
|
||||||
|
@ApiProperty({ type: 'number', format: 'double', description: 'Normalized y coordinate of box corner 1 (0-1)' })
|
||||||
|
y1!: number;
|
||||||
|
|
||||||
|
@ApiProperty({ type: 'number', format: 'double', description: 'Normalized x coordinate of box corner 2 (0-1)' })
|
||||||
|
x2!: number;
|
||||||
|
|
||||||
|
@ApiProperty({ type: 'number', format: 'double', description: 'Normalized y coordinate of box corner 2 (0-1)' })
|
||||||
|
y2!: number;
|
||||||
|
|
||||||
|
@ApiProperty({ type: 'number', format: 'double', description: 'Normalized x coordinate of box corner 3 (0-1)' })
|
||||||
|
x3!: number;
|
||||||
|
|
||||||
|
@ApiProperty({ type: 'number', format: 'double', description: 'Normalized y coordinate of box corner 3 (0-1)' })
|
||||||
|
y3!: number;
|
||||||
|
|
||||||
|
@ApiProperty({ type: 'number', format: 'double', description: 'Normalized x coordinate of box corner 4 (0-1)' })
|
||||||
|
x4!: number;
|
||||||
|
|
||||||
|
@ApiProperty({ type: 'number', format: 'double', description: 'Normalized y coordinate of box corner 4 (0-1)' })
|
||||||
|
y4!: number;
|
||||||
|
|
||||||
|
@ApiProperty({ type: 'number', format: 'double', description: 'Confidence score for text detection box' })
|
||||||
|
boxScore!: number;
|
||||||
|
|
||||||
|
@ApiProperty({ type: 'number', format: 'double', description: 'Confidence score for text recognition' })
|
||||||
|
textScore!: number;
|
||||||
|
|
||||||
|
@ApiProperty({ type: 'string', description: 'Recognized text' })
|
||||||
|
text!: string;
|
||||||
|
}
|
||||||
|
|
@ -700,6 +700,42 @@ describe(AssetService.name, () => {
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('getOcr', () => {
|
||||||
|
it('should require asset read permission', async () => {
|
||||||
|
mocks.access.asset.checkOwnerAccess.mockResolvedValue(new Set());
|
||||||
|
|
||||||
|
await expect(sut.getOcr(authStub.admin, 'asset-1')).rejects.toBeInstanceOf(BadRequestException);
|
||||||
|
|
||||||
|
expect(mocks.ocr.getByAssetId).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should return OCR data for an asset', async () => {
|
||||||
|
const ocr1 = factory.assetOcr({ text: 'Hello World' });
|
||||||
|
const ocr2 = factory.assetOcr({ text: 'Test Image' });
|
||||||
|
|
||||||
|
mocks.access.asset.checkOwnerAccess.mockResolvedValue(new Set(['asset-1']));
|
||||||
|
mocks.ocr.getByAssetId.mockResolvedValue([ocr1, ocr2]);
|
||||||
|
|
||||||
|
await expect(sut.getOcr(authStub.admin, 'asset-1')).resolves.toEqual([ocr1, ocr2]);
|
||||||
|
|
||||||
|
expect(mocks.access.asset.checkOwnerAccess).toHaveBeenCalledWith(
|
||||||
|
authStub.admin.user.id,
|
||||||
|
new Set(['asset-1']),
|
||||||
|
undefined,
|
||||||
|
);
|
||||||
|
expect(mocks.ocr.getByAssetId).toHaveBeenCalledWith('asset-1');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should return empty array when no OCR data exists', async () => {
|
||||||
|
mocks.access.asset.checkOwnerAccess.mockResolvedValue(new Set(['asset-1']));
|
||||||
|
mocks.ocr.getByAssetId.mockResolvedValue([]);
|
||||||
|
|
||||||
|
await expect(sut.getOcr(authStub.admin, 'asset-1')).resolves.toEqual([]);
|
||||||
|
|
||||||
|
expect(mocks.ocr.getByAssetId).toHaveBeenCalledWith('asset-1');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe('run', () => {
|
describe('run', () => {
|
||||||
it('should run the refresh faces job', async () => {
|
it('should run the refresh faces job', async () => {
|
||||||
mocks.access.asset.checkOwnerAccess.mockResolvedValue(new Set(['asset-1']));
|
mocks.access.asset.checkOwnerAccess.mockResolvedValue(new Set(['asset-1']));
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,7 @@ import {
|
||||||
mapStats,
|
mapStats,
|
||||||
} from 'src/dtos/asset.dto';
|
} from 'src/dtos/asset.dto';
|
||||||
import { AuthDto } from 'src/dtos/auth.dto';
|
import { AuthDto } from 'src/dtos/auth.dto';
|
||||||
|
import { AssetOcrResponseDto } from 'src/dtos/ocr.dto';
|
||||||
import { AssetMetadataKey, AssetStatus, AssetVisibility, JobName, JobStatus, Permission, QueueName } from 'src/enum';
|
import { AssetMetadataKey, AssetStatus, AssetVisibility, JobName, JobStatus, Permission, QueueName } from 'src/enum';
|
||||||
import { BaseService } from 'src/services/base.service';
|
import { BaseService } from 'src/services/base.service';
|
||||||
import { ISidecarWriteJob, JobItem, JobOf } from 'src/types';
|
import { ISidecarWriteJob, JobItem, JobOf } from 'src/types';
|
||||||
|
|
@ -289,6 +290,11 @@ export class AssetService extends BaseService {
|
||||||
return this.assetRepository.getMetadata(id);
|
return this.assetRepository.getMetadata(id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async getOcr(auth: AuthDto, id: string): Promise<AssetOcrResponseDto[]> {
|
||||||
|
await this.requireAccess({ auth, permission: Permission.AssetRead, ids: [id] });
|
||||||
|
return this.ocrRepository.getByAssetId(id);
|
||||||
|
}
|
||||||
|
|
||||||
async upsertMetadata(auth: AuthDto, id: string, dto: AssetMetadataUpsertDto): Promise<AssetMetadataResponseDto[]> {
|
async upsertMetadata(auth: AuthDto, id: string, dto: AssetMetadataUpsertDto): Promise<AssetMetadataResponseDto[]> {
|
||||||
await this.requireAccess({ auth, permission: Permission.AssetUpdate, ids: [id] });
|
await this.requireAccess({ auth, permission: Permission.AssetUpdate, ids: [id] });
|
||||||
return this.assetRepository.upsertMetadata(id, dto.items);
|
return this.assetRepository.upsertMetadata(id, dto.items);
|
||||||
|
|
|
||||||
|
|
@ -309,10 +309,44 @@ const assetSidecarWriteFactory = (asset: Partial<SidecarWriteAsset> = {}) => ({
|
||||||
...asset,
|
...asset,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
const assetOcrFactory = (
|
||||||
|
ocr: {
|
||||||
|
id?: string;
|
||||||
|
assetId?: string;
|
||||||
|
x1?: number;
|
||||||
|
y1?: number;
|
||||||
|
x2?: number;
|
||||||
|
y2?: number;
|
||||||
|
x3?: number;
|
||||||
|
y3?: number;
|
||||||
|
x4?: number;
|
||||||
|
y4?: number;
|
||||||
|
boxScore?: number;
|
||||||
|
textScore?: number;
|
||||||
|
text?: string;
|
||||||
|
} = {},
|
||||||
|
) => ({
|
||||||
|
id: newUuid(),
|
||||||
|
assetId: newUuid(),
|
||||||
|
x1: 0.1,
|
||||||
|
y1: 0.2,
|
||||||
|
x2: 0.3,
|
||||||
|
y2: 0.2,
|
||||||
|
x3: 0.3,
|
||||||
|
y3: 0.4,
|
||||||
|
x4: 0.1,
|
||||||
|
y4: 0.4,
|
||||||
|
boxScore: 0.95,
|
||||||
|
textScore: 0.92,
|
||||||
|
text: 'Sample Text',
|
||||||
|
...ocr,
|
||||||
|
});
|
||||||
|
|
||||||
export const factory = {
|
export const factory = {
|
||||||
activity: activityFactory,
|
activity: activityFactory,
|
||||||
apiKey: apiKeyFactory,
|
apiKey: apiKeyFactory,
|
||||||
asset: assetFactory,
|
asset: assetFactory,
|
||||||
|
assetOcr: assetOcrFactory,
|
||||||
auth: authFactory,
|
auth: authFactory,
|
||||||
authApiKey: authApiKeyFactory,
|
authApiKey: authApiKeyFactory,
|
||||||
authUser: authUserFactory,
|
authUser: authUserFactory,
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue