diff --git a/.changeset/chilled-tips-judge.md b/.changeset/chilled-tips-judge.md new file mode 100644 index 00000000000..15d58806a9e --- /dev/null +++ b/.changeset/chilled-tips-judge.md @@ -0,0 +1,6 @@ +--- +'@firebase/vertexai': minor +'firebase': minor +--- + +Added support for modality-based token count. diff --git a/common/api-review/vertexai.api.md b/common/api-review/vertexai.api.md index 4521aaf2b70..8b1dd83f51a 100644 --- a/common/api-review/vertexai.api.md +++ b/common/api-review/vertexai.api.md @@ -92,6 +92,7 @@ export interface CountTokensRequest { // @public export interface CountTokensResponse { + promptTokensDetails?: ModalityTokenCount[]; totalBillableCharacters?: number; totalTokens: number; } @@ -532,6 +533,22 @@ export class IntegerSchema extends Schema { constructor(schemaParams?: SchemaParams); } +// @public +export enum Modality { + AUDIO = "AUDIO", + DOCUMENT = "DOCUMENT", + IMAGE = "IMAGE", + MODALITY_UNSPECIFIED = "MODALITY_UNSPECIFIED", + TEXT = "TEXT", + VIDEO = "VIDEO" +} + +// @public +export interface ModalityTokenCount { + modality: Modality; + tokenCount: number; +} + // @public export interface ModelParams extends BaseParams { // (undocumented) @@ -767,8 +784,12 @@ export interface UsageMetadata { // (undocumented) candidatesTokenCount: number; // (undocumented) + candidatesTokensDetails?: ModalityTokenCount[]; + // (undocumented) promptTokenCount: number; // (undocumented) + promptTokensDetails?: ModalityTokenCount[]; + // (undocumented) totalTokenCount: number; } diff --git a/docs-devsite/_toc.yaml b/docs-devsite/_toc.yaml index d97377e3d3d..665222edb9d 100644 --- a/docs-devsite/_toc.yaml +++ b/docs-devsite/_toc.yaml @@ -556,6 +556,8 @@ toc: path: /docs/reference/js/vertexai.inlinedatapart.md - title: IntegerSchema path: /docs/reference/js/vertexai.integerschema.md + - title: ModalityTokenCount + path: /docs/reference/js/vertexai.modalitytokencount.md - title: ModelParams path: /docs/reference/js/vertexai.modelparams.md - title: NumberSchema diff --git a/docs-devsite/vertexai.counttokensresponse.md b/docs-devsite/vertexai.counttokensresponse.md index 2978f9a45bb..d67cc99fab2 100644 --- a/docs-devsite/vertexai.counttokensresponse.md +++ b/docs-devsite/vertexai.counttokensresponse.md @@ -22,9 +22,20 @@ export interface CountTokensResponse | Property | Type | Description | | --- | --- | --- | +| [promptTokensDetails](./vertexai.counttokensresponse.md#counttokensresponseprompttokensdetails) | [ModalityTokenCount](./vertexai.modalitytokencount.md#modalitytokencount_interface)\[\] | The breakdown, by modality, of how many tokens are consumed by the prompt. | | [totalBillableCharacters](./vertexai.counttokensresponse.md#counttokensresponsetotalbillablecharacters) | number | The total number of billable characters counted across all instances from the request. | | [totalTokens](./vertexai.counttokensresponse.md#counttokensresponsetotaltokens) | number | The total number of tokens counted across all instances from the request. | +## CountTokensResponse.promptTokensDetails + +The breakdown, by modality, of how many tokens are consumed by the prompt. + +Signature: + +```typescript +promptTokensDetails?: ModalityTokenCount[]; +``` + ## CountTokensResponse.totalBillableCharacters The total number of billable characters counted across all instances from the request. diff --git a/docs-devsite/vertexai.md b/docs-devsite/vertexai.md index 1d284345c39..d174bef7bcf 100644 --- a/docs-devsite/vertexai.md +++ b/docs-devsite/vertexai.md @@ -55,6 +55,7 @@ The Vertex AI in Firebase Web SDK. | [ImagenAspectRatio](./vertexai.md#imagenaspectratio) | (Public Preview) Aspect ratios for Imagen images.To specify an aspect ratio for generated images, set the aspectRatio property in your [ImagenGenerationConfig](./vertexai.imagengenerationconfig.md#imagengenerationconfig_interface).See the the [documentation](http://firebase.google.com/docs/vertex-ai/generate-images) for more details and examples of the supported aspect ratios. | | [ImagenPersonFilterLevel](./vertexai.md#imagenpersonfilterlevel) | (Public Preview) A filter level controlling whether generation of images containing people or faces is allowed.See the personGeneration documentation for more details. | | [ImagenSafetyFilterLevel](./vertexai.md#imagensafetyfilterlevel) | (Public Preview) A filter level controlling how aggressively to filter sensitive content.Text prompts provided as inputs and images (generated or uploaded) through Imagen on Vertex AI are assessed against a list of safety filters, which include 'harmful categories' (for example, violence, sexual, derogatory, and toxic). This filter level controls how aggressively to filter out potentially harmful content from responses. See the [documentation](http://firebase.google.com/docs/vertex-ai/generate-images) and the [Responsible AI and usage guidelines](https://cloud.google.com/vertex-ai/generative-ai/docs/image/responsible-ai-imagen#safety-filters) for more details. | +| [Modality](./vertexai.md#modality) | Content part modality. | | [SchemaType](./vertexai.md#schematype) | Contains the list of OpenAPI data types as defined by the [OpenAPI specification](https://swagger.io/docs/specification/data-models/data-types/) | | [VertexAIErrorCode](./vertexai.md#vertexaierrorcode) | Standardized error codes that [VertexAIError](./vertexai.vertexaierror.md#vertexaierror_class) can have. | @@ -97,6 +98,7 @@ The Vertex AI in Firebase Web SDK. | [ImagenModelParams](./vertexai.imagenmodelparams.md#imagenmodelparams_interface) | (Public Preview) Parameters for configuring an [ImagenModel](./vertexai.imagenmodel.md#imagenmodel_class). | | [ImagenSafetySettings](./vertexai.imagensafetysettings.md#imagensafetysettings_interface) | (Public Preview) Settings for controlling the aggressiveness of filtering out sensitive content.See the [documentation](http://firebase.google.com/docs/vertex-ai/generate-images) for more details. | | [InlineDataPart](./vertexai.inlinedatapart.md#inlinedatapart_interface) | Content part interface if the part represents an image. | +| [ModalityTokenCount](./vertexai.modalitytokencount.md#modalitytokencount_interface) | Represents token counting info for a single modality. | | [ModelParams](./vertexai.modelparams.md#modelparams_interface) | Params passed to [getGenerativeModel()](./vertexai.md#getgenerativemodel_e3037c9). | | [ObjectSchemaInterface](./vertexai.objectschemainterface.md#objectschemainterface_interface) | Interface for [ObjectSchema](./vertexai.objectschema.md#objectschema_class) class. | | [PromptFeedback](./vertexai.promptfeedback.md#promptfeedback_interface) | If the prompt was blocked, this will be populated with blockReason and the relevant safetyRatings. | @@ -481,6 +483,27 @@ export declare enum ImagenSafetyFilterLevel | BLOCK\_NONE | "block_none" | (Public Preview) The least aggressive filtering level; blocks very few sensitive prompts and responses.Access to this feature is restricted and may require your case to be reviewed and approved by Cloud support. | | BLOCK\_ONLY\_HIGH | "block_only_high" | (Public Preview) Blocks few sensitive prompts and responses. | +## Modality + +Content part modality. + +Signature: + +```typescript +export declare enum Modality +``` + +## Enumeration Members + +| Member | Value | Description | +| --- | --- | --- | +| AUDIO | "AUDIO" | Audio. | +| DOCUMENT | "DOCUMENT" | Document (for example, PDF). | +| IMAGE | "IMAGE" | Image. | +| MODALITY\_UNSPECIFIED | "MODALITY_UNSPECIFIED" | Unspecified modality. | +| TEXT | "TEXT" | Plain text. | +| VIDEO | "VIDEO" | Video. | + ## SchemaType Contains the list of OpenAPI data types as defined by the [OpenAPI specification](https://swagger.io/docs/specification/data-models/data-types/) diff --git a/docs-devsite/vertexai.modalitytokencount.md b/docs-devsite/vertexai.modalitytokencount.md new file mode 100644 index 00000000000..d710b51fba6 --- /dev/null +++ b/docs-devsite/vertexai.modalitytokencount.md @@ -0,0 +1,46 @@ +Project: /docs/reference/js/_project.yaml +Book: /docs/reference/_book.yaml +page_type: reference + +{% comment %} +DO NOT EDIT THIS FILE! +This is generated by the JS SDK team, and any local changes will be +overwritten. Changes should be made in the source code at +https://github.com/firebase/firebase-js-sdk +{% endcomment %} + +# ModalityTokenCount interface +Represents token counting info for a single modality. + +Signature: + +```typescript +export interface ModalityTokenCount +``` + +## Properties + +| Property | Type | Description | +| --- | --- | --- | +| [modality](./vertexai.modalitytokencount.md#modalitytokencountmodality) | [Modality](./vertexai.md#modality) | The modality associated with this token count. | +| [tokenCount](./vertexai.modalitytokencount.md#modalitytokencounttokencount) | number | The number of tokens counted. | + +## ModalityTokenCount.modality + +The modality associated with this token count. + +Signature: + +```typescript +modality: Modality; +``` + +## ModalityTokenCount.tokenCount + +The number of tokens counted. + +Signature: + +```typescript +tokenCount: number; +``` diff --git a/docs-devsite/vertexai.usagemetadata.md b/docs-devsite/vertexai.usagemetadata.md index d254f34335f..5f886dd29f2 100644 --- a/docs-devsite/vertexai.usagemetadata.md +++ b/docs-devsite/vertexai.usagemetadata.md @@ -23,7 +23,9 @@ export interface UsageMetadata | Property | Type | Description | | --- | --- | --- | | [candidatesTokenCount](./vertexai.usagemetadata.md#usagemetadatacandidatestokencount) | number | | +| [candidatesTokensDetails](./vertexai.usagemetadata.md#usagemetadatacandidatestokensdetails) | [ModalityTokenCount](./vertexai.modalitytokencount.md#modalitytokencount_interface)\[\] | | | [promptTokenCount](./vertexai.usagemetadata.md#usagemetadataprompttokencount) | number | | +| [promptTokensDetails](./vertexai.usagemetadata.md#usagemetadataprompttokensdetails) | [ModalityTokenCount](./vertexai.modalitytokencount.md#modalitytokencount_interface)\[\] | | | [totalTokenCount](./vertexai.usagemetadata.md#usagemetadatatotaltokencount) | number | | ## UsageMetadata.candidatesTokenCount @@ -34,6 +36,14 @@ export interface UsageMetadata candidatesTokenCount: number; ``` +## UsageMetadata.candidatesTokensDetails + +Signature: + +```typescript +candidatesTokensDetails?: ModalityTokenCount[]; +``` + ## UsageMetadata.promptTokenCount Signature: @@ -42,6 +52,14 @@ candidatesTokenCount: number; promptTokenCount: number; ``` +## UsageMetadata.promptTokensDetails + +Signature: + +```typescript +promptTokensDetails?: ModalityTokenCount[]; +``` + ## UsageMetadata.totalTokenCount Signature: diff --git a/packages/vertexai/src/methods/count-tokens.test.ts b/packages/vertexai/src/methods/count-tokens.test.ts index fd4b99e1e00..2032e884fb4 100644 --- a/packages/vertexai/src/methods/count-tokens.test.ts +++ b/packages/vertexai/src/methods/count-tokens.test.ts @@ -66,6 +66,33 @@ describe('countTokens()', () => { undefined ); }); + it('total tokens with modality details', async () => { + const mockResponse = getMockResponse( + 'unary-success-detailed-token-response.json' + ); + const makeRequestStub = stub(request, 'makeRequest').resolves( + mockResponse as Response + ); + const result = await countTokens( + fakeApiSettings, + 'model', + fakeRequestParams + ); + expect(result.totalTokens).to.equal(1837); + expect(result.totalBillableCharacters).to.equal(117); + expect(result.promptTokensDetails?.[0].modality).to.equal('IMAGE'); + expect(result.promptTokensDetails?.[0].tokenCount).to.equal(1806); + expect(makeRequestStub).to.be.calledWith( + 'model', + Task.COUNT_TOKENS, + fakeApiSettings, + false, + match((value: string) => { + return value.includes('contents'); + }), + undefined + ); + }); it('total tokens no billable characters', async () => { const mockResponse = getMockResponse( 'unary-success-no-billable-characters.json' diff --git a/packages/vertexai/src/methods/generate-content.test.ts b/packages/vertexai/src/methods/generate-content.test.ts index c5a1d9e1e91..001fe12c9c8 100644 --- a/packages/vertexai/src/methods/generate-content.test.ts +++ b/packages/vertexai/src/methods/generate-content.test.ts @@ -102,6 +102,40 @@ describe('generateContent()', () => { match.any ); }); + it('long response with token details', async () => { + const mockResponse = getMockResponse( + 'unary-success-basic-response-long-usage-metadata.json' + ); + const makeRequestStub = stub(request, 'makeRequest').resolves( + mockResponse as Response + ); + const result = await generateContent( + fakeApiSettings, + 'model', + fakeRequestParams + ); + expect(result.response.usageMetadata?.totalTokenCount).to.equal(1913); + expect(result.response.usageMetadata?.candidatesTokenCount).to.equal(76); + expect( + result.response.usageMetadata?.promptTokensDetails?.[0].modality + ).to.equal('IMAGE'); + expect( + result.response.usageMetadata?.promptTokensDetails?.[0].tokenCount + ).to.equal(1806); + expect( + result.response.usageMetadata?.candidatesTokensDetails?.[0].modality + ).to.equal('TEXT'); + expect( + result.response.usageMetadata?.candidatesTokensDetails?.[0].tokenCount + ).to.equal(76); + expect(makeRequestStub).to.be.calledWith( + 'model', + Task.GENERATE_CONTENT, + fakeApiSettings, + false, + match.any + ); + }); it('citations', async () => { const mockResponse = getMockResponse('unary-success-citations.json'); const makeRequestStub = stub(request, 'makeRequest').resolves( diff --git a/packages/vertexai/src/types/enums.ts b/packages/vertexai/src/types/enums.ts index 3e66bacc612..4a7d95c660c 100644 --- a/packages/vertexai/src/types/enums.ts +++ b/packages/vertexai/src/types/enums.ts @@ -137,3 +137,34 @@ export enum FunctionCallingMode { // not passing any function declarations. NONE = 'NONE' } + +/** + * Content part modality. + * @public + */ +export enum Modality { + /** + * Unspecified modality. + */ + MODALITY_UNSPECIFIED = 'MODALITY_UNSPECIFIED', + /** + * Plain text. + */ + TEXT = 'TEXT', + /** + * Image. + */ + IMAGE = 'IMAGE', + /** + * Video. + */ + VIDEO = 'VIDEO', + /** + * Audio. + */ + AUDIO = 'AUDIO', + /** + * Document (for example, PDF). + */ + DOCUMENT = 'DOCUMENT' +} diff --git a/packages/vertexai/src/types/responses.ts b/packages/vertexai/src/types/responses.ts index 83cd4366f12..5685ed68ad6 100644 --- a/packages/vertexai/src/types/responses.ts +++ b/packages/vertexai/src/types/responses.ts @@ -21,7 +21,8 @@ import { FinishReason, HarmCategory, HarmProbability, - HarmSeverity + HarmSeverity, + Modality } from './enums'; /** @@ -83,6 +84,20 @@ export interface UsageMetadata { promptTokenCount: number; candidatesTokenCount: number; totalTokenCount: number; + promptTokensDetails?: ModalityTokenCount[]; + candidatesTokensDetails?: ModalityTokenCount[]; +} + +/** + * Represents token counting info for a single modality. + * + * @public + */ +export interface ModalityTokenCount { + /** The modality associated with this token count. */ + modality: Modality; + /** The number of tokens counted. */ + tokenCount: number; } /** @@ -213,4 +228,8 @@ export interface CountTokensResponse { * from the request. */ totalBillableCharacters?: number; + /** + * The breakdown, by modality, of how many tokens are consumed by the prompt. + */ + promptTokensDetails?: ModalityTokenCount[]; }