diff --git a/.changeset/chilled-tips-judge.md b/.changeset/chilled-tips-judge.md
new file mode 100644
index 00000000000..15d58806a9e
--- /dev/null
+++ b/.changeset/chilled-tips-judge.md
@@ -0,0 +1,6 @@
+---
+'@firebase/vertexai': minor
+'firebase': minor
+---
+
+Added support for modality-based token count.
diff --git a/common/api-review/vertexai.api.md b/common/api-review/vertexai.api.md
index 4521aaf2b70..8b1dd83f51a 100644
--- a/common/api-review/vertexai.api.md
+++ b/common/api-review/vertexai.api.md
@@ -92,6 +92,7 @@ export interface CountTokensRequest {
// @public
export interface CountTokensResponse {
+ promptTokensDetails?: ModalityTokenCount[];
totalBillableCharacters?: number;
totalTokens: number;
}
@@ -532,6 +533,22 @@ export class IntegerSchema extends Schema {
constructor(schemaParams?: SchemaParams);
}
+// @public
+export enum Modality {
+ AUDIO = "AUDIO",
+ DOCUMENT = "DOCUMENT",
+ IMAGE = "IMAGE",
+ MODALITY_UNSPECIFIED = "MODALITY_UNSPECIFIED",
+ TEXT = "TEXT",
+ VIDEO = "VIDEO"
+}
+
+// @public
+export interface ModalityTokenCount {
+ modality: Modality;
+ tokenCount: number;
+}
+
// @public
export interface ModelParams extends BaseParams {
// (undocumented)
@@ -767,8 +784,12 @@ export interface UsageMetadata {
// (undocumented)
candidatesTokenCount: number;
// (undocumented)
+ candidatesTokensDetails?: ModalityTokenCount[];
+ // (undocumented)
promptTokenCount: number;
// (undocumented)
+ promptTokensDetails?: ModalityTokenCount[];
+ // (undocumented)
totalTokenCount: number;
}
diff --git a/docs-devsite/_toc.yaml b/docs-devsite/_toc.yaml
index d97377e3d3d..665222edb9d 100644
--- a/docs-devsite/_toc.yaml
+++ b/docs-devsite/_toc.yaml
@@ -556,6 +556,8 @@ toc:
path: /docs/reference/js/vertexai.inlinedatapart.md
- title: IntegerSchema
path: /docs/reference/js/vertexai.integerschema.md
+ - title: ModalityTokenCount
+ path: /docs/reference/js/vertexai.modalitytokencount.md
- title: ModelParams
path: /docs/reference/js/vertexai.modelparams.md
- title: NumberSchema
diff --git a/docs-devsite/vertexai.counttokensresponse.md b/docs-devsite/vertexai.counttokensresponse.md
index 2978f9a45bb..d67cc99fab2 100644
--- a/docs-devsite/vertexai.counttokensresponse.md
+++ b/docs-devsite/vertexai.counttokensresponse.md
@@ -22,9 +22,20 @@ export interface CountTokensResponse
| Property | Type | Description |
| --- | --- | --- |
+| [promptTokensDetails](./vertexai.counttokensresponse.md#counttokensresponseprompttokensdetails) | [ModalityTokenCount](./vertexai.modalitytokencount.md#modalitytokencount_interface)\[\] | The breakdown, by modality, of how many tokens are consumed by the prompt. |
| [totalBillableCharacters](./vertexai.counttokensresponse.md#counttokensresponsetotalbillablecharacters) | number | The total number of billable characters counted across all instances from the request. |
| [totalTokens](./vertexai.counttokensresponse.md#counttokensresponsetotaltokens) | number | The total number of tokens counted across all instances from the request. |
+## CountTokensResponse.promptTokensDetails
+
+The breakdown, by modality, of how many tokens are consumed by the prompt.
+
+Signature:
+
+```typescript
+promptTokensDetails?: ModalityTokenCount[];
+```
+
## CountTokensResponse.totalBillableCharacters
The total number of billable characters counted across all instances from the request.
diff --git a/docs-devsite/vertexai.md b/docs-devsite/vertexai.md
index 1d284345c39..d174bef7bcf 100644
--- a/docs-devsite/vertexai.md
+++ b/docs-devsite/vertexai.md
@@ -55,6 +55,7 @@ The Vertex AI in Firebase Web SDK.
| [ImagenAspectRatio](./vertexai.md#imagenaspectratio) | (Public Preview) Aspect ratios for Imagen images.To specify an aspect ratio for generated images, set the aspectRatio
property in your [ImagenGenerationConfig](./vertexai.imagengenerationconfig.md#imagengenerationconfig_interface)
.See the the [documentation](http://firebase.google.com/docs/vertex-ai/generate-images) for more details and examples of the supported aspect ratios. |
| [ImagenPersonFilterLevel](./vertexai.md#imagenpersonfilterlevel) | (Public Preview) A filter level controlling whether generation of images containing people or faces is allowed.See the personGeneration
documentation for more details. |
| [ImagenSafetyFilterLevel](./vertexai.md#imagensafetyfilterlevel) | (Public Preview) A filter level controlling how aggressively to filter sensitive content.Text prompts provided as inputs and images (generated or uploaded) through Imagen on Vertex AI are assessed against a list of safety filters, which include 'harmful categories' (for example, violence
, sexual
, derogatory
, and toxic
). This filter level controls how aggressively to filter out potentially harmful content from responses. See the [documentation](http://firebase.google.com/docs/vertex-ai/generate-images) and the [Responsible AI and usage guidelines](https://cloud.google.com/vertex-ai/generative-ai/docs/image/responsible-ai-imagen#safety-filters) for more details. |
+| [Modality](./vertexai.md#modality) | Content part modality. |
| [SchemaType](./vertexai.md#schematype) | Contains the list of OpenAPI data types as defined by the [OpenAPI specification](https://swagger.io/docs/specification/data-models/data-types/) |
| [VertexAIErrorCode](./vertexai.md#vertexaierrorcode) | Standardized error codes that [VertexAIError](./vertexai.vertexaierror.md#vertexaierror_class)
can have. |
@@ -97,6 +98,7 @@ The Vertex AI in Firebase Web SDK.
| [ImagenModelParams](./vertexai.imagenmodelparams.md#imagenmodelparams_interface) | (Public Preview) Parameters for configuring an [ImagenModel](./vertexai.imagenmodel.md#imagenmodel_class)
. |
| [ImagenSafetySettings](./vertexai.imagensafetysettings.md#imagensafetysettings_interface) | (Public Preview) Settings for controlling the aggressiveness of filtering out sensitive content.See the [documentation](http://firebase.google.com/docs/vertex-ai/generate-images) for more details. |
| [InlineDataPart](./vertexai.inlinedatapart.md#inlinedatapart_interface) | Content part interface if the part represents an image. |
+| [ModalityTokenCount](./vertexai.modalitytokencount.md#modalitytokencount_interface) | Represents token counting info for a single modality. |
| [ModelParams](./vertexai.modelparams.md#modelparams_interface) | Params passed to [getGenerativeModel()](./vertexai.md#getgenerativemodel_e3037c9)
. |
| [ObjectSchemaInterface](./vertexai.objectschemainterface.md#objectschemainterface_interface) | Interface for [ObjectSchema](./vertexai.objectschema.md#objectschema_class)
class. |
| [PromptFeedback](./vertexai.promptfeedback.md#promptfeedback_interface) | If the prompt was blocked, this will be populated with blockReason
and the relevant safetyRatings
. |
@@ -481,6 +483,27 @@ export declare enum ImagenSafetyFilterLevel
| BLOCK\_NONE | "block_none"
| (Public Preview) The least aggressive filtering level; blocks very few sensitive prompts and responses.Access to this feature is restricted and may require your case to be reviewed and approved by Cloud support. |
| BLOCK\_ONLY\_HIGH | "block_only_high"
| (Public Preview) Blocks few sensitive prompts and responses. |
+## Modality
+
+Content part modality.
+
+Signature:
+
+```typescript
+export declare enum Modality
+```
+
+## Enumeration Members
+
+| Member | Value | Description |
+| --- | --- | --- |
+| AUDIO | "AUDIO"
| Audio. |
+| DOCUMENT | "DOCUMENT"
| Document (for example, PDF). |
+| IMAGE | "IMAGE"
| Image. |
+| MODALITY\_UNSPECIFIED | "MODALITY_UNSPECIFIED"
| Unspecified modality. |
+| TEXT | "TEXT"
| Plain text. |
+| VIDEO | "VIDEO"
| Video. |
+
## SchemaType
Contains the list of OpenAPI data types as defined by the [OpenAPI specification](https://swagger.io/docs/specification/data-models/data-types/)
diff --git a/docs-devsite/vertexai.modalitytokencount.md b/docs-devsite/vertexai.modalitytokencount.md
new file mode 100644
index 00000000000..d710b51fba6
--- /dev/null
+++ b/docs-devsite/vertexai.modalitytokencount.md
@@ -0,0 +1,46 @@
+Project: /docs/reference/js/_project.yaml
+Book: /docs/reference/_book.yaml
+page_type: reference
+
+{% comment %}
+DO NOT EDIT THIS FILE!
+This is generated by the JS SDK team, and any local changes will be
+overwritten. Changes should be made in the source code at
+https://github.com/firebase/firebase-js-sdk
+{% endcomment %}
+
+# ModalityTokenCount interface
+Represents token counting info for a single modality.
+
+Signature:
+
+```typescript
+export interface ModalityTokenCount
+```
+
+## Properties
+
+| Property | Type | Description |
+| --- | --- | --- |
+| [modality](./vertexai.modalitytokencount.md#modalitytokencountmodality) | [Modality](./vertexai.md#modality) | The modality associated with this token count. |
+| [tokenCount](./vertexai.modalitytokencount.md#modalitytokencounttokencount) | number | The number of tokens counted. |
+
+## ModalityTokenCount.modality
+
+The modality associated with this token count.
+
+Signature:
+
+```typescript
+modality: Modality;
+```
+
+## ModalityTokenCount.tokenCount
+
+The number of tokens counted.
+
+Signature:
+
+```typescript
+tokenCount: number;
+```
diff --git a/docs-devsite/vertexai.usagemetadata.md b/docs-devsite/vertexai.usagemetadata.md
index d254f34335f..5f886dd29f2 100644
--- a/docs-devsite/vertexai.usagemetadata.md
+++ b/docs-devsite/vertexai.usagemetadata.md
@@ -23,7 +23,9 @@ export interface UsageMetadata
| Property | Type | Description |
| --- | --- | --- |
| [candidatesTokenCount](./vertexai.usagemetadata.md#usagemetadatacandidatestokencount) | number | |
+| [candidatesTokensDetails](./vertexai.usagemetadata.md#usagemetadatacandidatestokensdetails) | [ModalityTokenCount](./vertexai.modalitytokencount.md#modalitytokencount_interface)\[\] | |
| [promptTokenCount](./vertexai.usagemetadata.md#usagemetadataprompttokencount) | number | |
+| [promptTokensDetails](./vertexai.usagemetadata.md#usagemetadataprompttokensdetails) | [ModalityTokenCount](./vertexai.modalitytokencount.md#modalitytokencount_interface)\[\] | |
| [totalTokenCount](./vertexai.usagemetadata.md#usagemetadatatotaltokencount) | number | |
## UsageMetadata.candidatesTokenCount
@@ -34,6 +36,14 @@ export interface UsageMetadata
candidatesTokenCount: number;
```
+## UsageMetadata.candidatesTokensDetails
+
+Signature:
+
+```typescript
+candidatesTokensDetails?: ModalityTokenCount[];
+```
+
## UsageMetadata.promptTokenCount
Signature:
@@ -42,6 +52,14 @@ candidatesTokenCount: number;
promptTokenCount: number;
```
+## UsageMetadata.promptTokensDetails
+
+Signature:
+
+```typescript
+promptTokensDetails?: ModalityTokenCount[];
+```
+
## UsageMetadata.totalTokenCount
Signature:
diff --git a/packages/vertexai/src/methods/count-tokens.test.ts b/packages/vertexai/src/methods/count-tokens.test.ts
index fd4b99e1e00..2032e884fb4 100644
--- a/packages/vertexai/src/methods/count-tokens.test.ts
+++ b/packages/vertexai/src/methods/count-tokens.test.ts
@@ -66,6 +66,33 @@ describe('countTokens()', () => {
undefined
);
});
+ it('total tokens with modality details', async () => {
+ const mockResponse = getMockResponse(
+ 'unary-success-detailed-token-response.json'
+ );
+ const makeRequestStub = stub(request, 'makeRequest').resolves(
+ mockResponse as Response
+ );
+ const result = await countTokens(
+ fakeApiSettings,
+ 'model',
+ fakeRequestParams
+ );
+ expect(result.totalTokens).to.equal(1837);
+ expect(result.totalBillableCharacters).to.equal(117);
+ expect(result.promptTokensDetails?.[0].modality).to.equal('IMAGE');
+ expect(result.promptTokensDetails?.[0].tokenCount).to.equal(1806);
+ expect(makeRequestStub).to.be.calledWith(
+ 'model',
+ Task.COUNT_TOKENS,
+ fakeApiSettings,
+ false,
+ match((value: string) => {
+ return value.includes('contents');
+ }),
+ undefined
+ );
+ });
it('total tokens no billable characters', async () => {
const mockResponse = getMockResponse(
'unary-success-no-billable-characters.json'
diff --git a/packages/vertexai/src/methods/generate-content.test.ts b/packages/vertexai/src/methods/generate-content.test.ts
index c5a1d9e1e91..001fe12c9c8 100644
--- a/packages/vertexai/src/methods/generate-content.test.ts
+++ b/packages/vertexai/src/methods/generate-content.test.ts
@@ -102,6 +102,40 @@ describe('generateContent()', () => {
match.any
);
});
+ it('long response with token details', async () => {
+ const mockResponse = getMockResponse(
+ 'unary-success-basic-response-long-usage-metadata.json'
+ );
+ const makeRequestStub = stub(request, 'makeRequest').resolves(
+ mockResponse as Response
+ );
+ const result = await generateContent(
+ fakeApiSettings,
+ 'model',
+ fakeRequestParams
+ );
+ expect(result.response.usageMetadata?.totalTokenCount).to.equal(1913);
+ expect(result.response.usageMetadata?.candidatesTokenCount).to.equal(76);
+ expect(
+ result.response.usageMetadata?.promptTokensDetails?.[0].modality
+ ).to.equal('IMAGE');
+ expect(
+ result.response.usageMetadata?.promptTokensDetails?.[0].tokenCount
+ ).to.equal(1806);
+ expect(
+ result.response.usageMetadata?.candidatesTokensDetails?.[0].modality
+ ).to.equal('TEXT');
+ expect(
+ result.response.usageMetadata?.candidatesTokensDetails?.[0].tokenCount
+ ).to.equal(76);
+ expect(makeRequestStub).to.be.calledWith(
+ 'model',
+ Task.GENERATE_CONTENT,
+ fakeApiSettings,
+ false,
+ match.any
+ );
+ });
it('citations', async () => {
const mockResponse = getMockResponse('unary-success-citations.json');
const makeRequestStub = stub(request, 'makeRequest').resolves(
diff --git a/packages/vertexai/src/types/enums.ts b/packages/vertexai/src/types/enums.ts
index 3e66bacc612..4a7d95c660c 100644
--- a/packages/vertexai/src/types/enums.ts
+++ b/packages/vertexai/src/types/enums.ts
@@ -137,3 +137,34 @@ export enum FunctionCallingMode {
// not passing any function declarations.
NONE = 'NONE'
}
+
+/**
+ * Content part modality.
+ * @public
+ */
+export enum Modality {
+ /**
+ * Unspecified modality.
+ */
+ MODALITY_UNSPECIFIED = 'MODALITY_UNSPECIFIED',
+ /**
+ * Plain text.
+ */
+ TEXT = 'TEXT',
+ /**
+ * Image.
+ */
+ IMAGE = 'IMAGE',
+ /**
+ * Video.
+ */
+ VIDEO = 'VIDEO',
+ /**
+ * Audio.
+ */
+ AUDIO = 'AUDIO',
+ /**
+ * Document (for example, PDF).
+ */
+ DOCUMENT = 'DOCUMENT'
+}
diff --git a/packages/vertexai/src/types/responses.ts b/packages/vertexai/src/types/responses.ts
index 83cd4366f12..5685ed68ad6 100644
--- a/packages/vertexai/src/types/responses.ts
+++ b/packages/vertexai/src/types/responses.ts
@@ -21,7 +21,8 @@ import {
FinishReason,
HarmCategory,
HarmProbability,
- HarmSeverity
+ HarmSeverity,
+ Modality
} from './enums';
/**
@@ -83,6 +84,20 @@ export interface UsageMetadata {
promptTokenCount: number;
candidatesTokenCount: number;
totalTokenCount: number;
+ promptTokensDetails?: ModalityTokenCount[];
+ candidatesTokensDetails?: ModalityTokenCount[];
+}
+
+/**
+ * Represents token counting info for a single modality.
+ *
+ * @public
+ */
+export interface ModalityTokenCount {
+ /** The modality associated with this token count. */
+ modality: Modality;
+ /** The number of tokens counted. */
+ tokenCount: number;
}
/**
@@ -213,4 +228,8 @@ export interface CountTokensResponse {
* from the request.
*/
totalBillableCharacters?: number;
+ /**
+ * The breakdown, by modality, of how many tokens are consumed by the prompt.
+ */
+ promptTokensDetails?: ModalityTokenCount[];
}