count tokens tests

dlarocque · dlarocque · commit 472c452ac13d · 2025-05-16T14:45:19.000-04:00
diff --git a/packages/ai/integration/constants.ts b/packages/ai/integration/constants.ts
@@ -27,19 +27,17 @@ import { FIREBASE_CONFIG } from './firebase-config';
 
 const app = initializeApp(FIREBASE_CONFIG);
 
-export type ModelName = 'gemini-2.0-flash' | 'gemini-2.0-flash-exp';
-
 /**
  * Test config that all tests will be ran against.
  */
 export type TestConfig = Readonly<{
   ai: AI;
-  model: ModelName;
+  model: string;
   /** This will be used to output the test config at runtime */
   toString: () => string;
 }>;
 
-function formatConfigAsString(config: { ai: AI; model: ModelName }): string {
+function formatConfigAsString(config: { ai: AI; model: string }): string {
   return `${backendNames.get(config.ai.backend.backendType)} ${config.model}`;
 }
 
@@ -53,9 +51,9 @@ const backendNames: Map<BackendType, string> = new Map([
   [BackendType.VERTEX_AI, 'Vertex AI']
 ]);
 
-const modelNames: ReadonlyArray<ModelName> = [
+const modelNames: ReadonlyArray<string> = [
   'gemini-2.0-flash',
-  'gemini-2.0-flash-exp'
+  // 'gemini-2.0-flash-exp'
 ];
 
 export const testConfigs: ReadonlyArray<TestConfig> = backends.flatMap(backend => {
@@ -67,4 +65,9 @@ export const testConfigs: ReadonlyArray<TestConfig> = backends.flatMap(backend =
       toString: () => formatConfigAsString({ ai, model: modelName })
     }
   })
-})
+})
+
+export const TINY_IMG_BASE64 = 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABAQMAAAAl21bKAAAAA1BMVEUAAACnej3aAAAAAXRSTlMAQObYZgAAAApJREFUCNdjYAAAAAIAAeIhvDMAAAAASUVORK5CYII=';
+export const IMAGE_MIME_TYPE = 'image/png';
+export const TINY_MP3_BASE64 = 'SUQzBAAAAAAAIlRTU0UAAAAOAAADTGF2ZjYxLjcuMTAwAAAAAAAAAAAAAAD/+0DAAAAAAAAAAAAAAAAAAAAAAABJbmZvAAAADwAAAAUAAAK+AGhoaGhoaGhoaGhoaGhoaGhoaGiOjo6Ojo6Ojo6Ojo6Ojo6Ojo6OjrS0tLS0tLS0tLS0tLS0tLS0tLS02tra2tra2tra2tra2tra2tra2tr//////////////////////////wAAAABMYXZjNjEuMTkAAAAAAAAAAAAAAAAkAwYAAAAAAAACvhC6DYoAAAAAAP/7EMQAA8AAAaQAAAAgAAA0gAAABExBTUUzLjEwMFVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVV//sQxCmDwAABpAAAACAAADSAAAAEVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVX/+xDEUwPAAAGkAAAAIAAANIAAAARVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVf/7EMR8g8AAAaQAAAAgAAA0gAAABFVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVV//sQxKYDwAABpAAAACAAADSAAAAEVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVU=';
+export const AUDIO_MIME_TYPE = 'audio/mpeg';
diff --git a/packages/ai/integration/count-tokens.test.ts b/packages/ai/integration/count-tokens.test.ts
@@ -16,10 +16,30 @@
  */
 
 import { expect } from 'chai';
-import { Content, GenerationConfig, HarmBlockMethod, HarmBlockThreshold, HarmCategory, Modality, SafetySetting, getAI, getGenerativeModel, getVertexAI } from '../src';
 import {
+  Content,
+  GenerationConfig,
+  HarmBlockMethod,
+  HarmBlockThreshold,
+  HarmCategory,
+  Modality,
+  SafetySetting,
+  getGenerativeModel,
+  Part,
+  CountTokensRequest,
+  Schema,
+  InlineDataPart,
+  FileDataPart
+} from '../src';
+import {
+  AUDIO_MIME_TYPE,
+  IMAGE_MIME_TYPE,
+  TINY_IMG_BASE64,
+  TINY_MP3_BASE64,
   testConfigs
 } from './constants';
+import { FIREBASE_CONFIG } from './firebase-config';
+
 
 describe('Count Tokens', () => {
   testConfigs.forEach(testConfig => {
@@ -77,30 +97,86 @@ describe('Count Tokens', () => {
         expect(response.promptTokensDetails![0].modality).to.equal(Modality.TEXT);
         expect(response.promptTokensDetails![0].tokenCount).to.equal(6);
       });
+
       it('image input', async () => {
+        const model = getGenerativeModel(testConfig.ai, { model: testConfig.model });
+        const imagePart: Part = {
+          inlineData: {
+            mimeType: IMAGE_MIME_TYPE,
+            data: TINY_IMG_BASE64
+          }
+        };
+        const response = await model.countTokens([imagePart]);
+
+        const expectedImageTokens = 258;
+        expect(response.totalTokens, 'totalTokens should have correct token count').to.equal(expectedImageTokens);
+        expect(response.totalBillableCharacters, 'totalBillableCharacters should be undefined').to.be.undefined; // Incorrect behavior
+        expect(response.promptTokensDetails!.length, 'promptTokensDetails should have one entry').to.equal(1);
+        expect(response.promptTokensDetails![0].modality, 'modality should be IMAGE').to.equal(Modality.IMAGE);
+        expect(response.promptTokensDetails![0].tokenCount, 'promptTokenDetails tokenCount should be correct').to.equal(expectedImageTokens);
+      });
 
-      })
       it('audio input', async () => {
+        const model = getGenerativeModel(testConfig.ai, { model: testConfig.model });
+        const audioPart: InlineDataPart = {
+          inlineData: {
+            mimeType: AUDIO_MIME_TYPE,
+            data: TINY_MP3_BASE64
+          }
+        };
+
+        const response = await model.countTokens([audioPart]);
+        // This may be different on Google AI
+        expect(response.totalTokens, 'totalTokens is expected to be undefined').to.be.undefined;
+        expect(response.totalBillableCharacters, 'totalBillableCharacters should be undefined').to.be.undefined; // Incorrect behavior
+        expect(response.promptTokensDetails!.length, 'promptTokensDetails should have one entry').to.equal(1);
+        expect(response.promptTokensDetails![0].modality, 'modality should be AUDIO').to.equal(Modality.AUDIO);
+        expect(response.promptTokensDetails![0].tokenCount, 'promptTokenDetails tokenCount is expected to be undefined').to.be.undefined;
+      });
 
-      })
       it('text, image, and audio input', async () => {
+        const model = getGenerativeModel(testConfig.ai, { model: testConfig.model });
+        const textPart: Part = { text: 'Describe these:' };
+        const imagePart: Part = { inlineData: { mimeType: IMAGE_MIME_TYPE, data: TINY_IMG_BASE64 } };
+        const audioPart: Part = { inlineData: { mimeType: AUDIO_MIME_TYPE, data: TINY_MP3_BASE64 } };
 
-      })
-      it('public storage reference', async () => {
+        const request: CountTokensRequest = {
+          contents: [{ role: 'user', parts: [textPart, imagePart, audioPart] }]
+        };
+        const response = await model.countTokens(request);
+
+        expect(response.totalTokens, 'totalTokens should have correct token count').to.equal(261);
+        expect(response.totalBillableCharacters, 'totalBillableCharacters should have correct count').to.equal('Describe these:'.length - 1); // For some reason it's the length-1
+
+        expect(response.promptTokensDetails!.length, 'promptTokensDetails should have three entries').to.equal(3);
 
-      })
-      it('private storage reference', async () => {
+        const textDetails = response.promptTokensDetails!.find(d => d.modality === Modality.TEXT);
+        const visionDetails = response.promptTokensDetails!.find(d => d.modality === Modality.IMAGE);
+        const audioDetails = response.promptTokensDetails!.find(d => d.modality === Modality.AUDIO);
 
-      })
-      it('schema', async () => {
+        expect(textDetails).to.deep.equal({ modality: Modality.TEXT, tokenCount: 3 });
+        expect(visionDetails).to.deep.equal({ modality: Modality.IMAGE, tokenCount: 258 });
+        expect(audioDetails).to.deep.equal({ modality: Modality.AUDIO }); // Incorrect behavior because there's no tokenCount
+      });
 
-      })
-      // TODO (dlarocque): Test countTokens() with the following:
-      // - inline data
-      // - public storage reference
-      // - private storage reference (testing auth integration)
-      // - count tokens
-      // - JSON schema
+      it('public storage reference', async () => {
+        const model = getGenerativeModel(testConfig.ai, { model: testConfig.model });
+        const filePart: FileDataPart = {
+          fileData: {
+            mimeType: IMAGE_MIME_TYPE,
+            fileUri: `gs://${FIREBASE_CONFIG.storageBucket}/images/tree.png`
+          }
+        };
+        const response = await model.countTokens([filePart]);
+
+        const expectedFileTokens = 258;
+        expect(response.totalTokens, 'totalTokens should have correct token count').to.equal(expectedFileTokens);
+        expect(response.totalBillableCharacters, 'totalBillableCharacters should be undefined').to.be.undefined;
+        expect(response.promptTokensDetails).to.not.be.null;
+        expect(response.promptTokensDetails!.length).to.equal(1);
+        expect(response.promptTokensDetails![0].modality).to.equal(Modality.IMAGE);
+        expect(response.promptTokensDetails![0].tokenCount).to.equal(expectedFileTokens);
+      });
     });
   })
 });