diff --git a/packages/ai/integration/chat.test.ts b/packages/ai/integration/chat.test.ts index 6af0e7a9af9..b6772a38fb1 100644 --- a/packages/ai/integration/chat.test.ts +++ b/packages/ai/integration/chat.test.ts @@ -76,56 +76,85 @@ describe('Chat Session', () => { 'What is the capital of France?' ); const response1 = result1.response; - expect(response1.text().trim().toLowerCase()).to.include('paris'); + const result2 = await chat.sendMessage('And what about Italy?'); + const response2 = result2.response; + const history = await chat.getHistory(); - let history = await chat.getHistory(); - expect(history.length).to.equal(2); + expect(response1.text().trim().toLowerCase()).to.include('paris'); + expect(response1.usageMetadata).to.not.be.null; + expect(response2.text().trim().toLowerCase()).to.include('rome'); + expect(response2.usageMetadata).to.not.be.null; + expect(history.length).to.equal(4); expect(history[0].role).to.equal('user'); expect(history[0].parts[0].text).to.equal( 'What is the capital of France?' ); expect(history[1].role).to.equal('model'); expect(history[1].parts[0].text?.toLowerCase()).to.include('paris'); - - expect(response1.usageMetadata).to.not.be.null; - // Token counts can vary slightly in chat context - expect(response1.usageMetadata!.promptTokenCount).to.be.closeTo( - 15, // "What is the capital of France?" + system instruction - TOKEN_COUNT_DELTA + 2 // More variance for chat context - ); - expect(response1.usageMetadata!.candidatesTokenCount).to.be.closeTo( - 8, // "Paris" - TOKEN_COUNT_DELTA - ); - expect(response1.usageMetadata!.totalTokenCount).to.be.closeTo( - 23, // "What is the capital of France?" + system instruction + "Paris" - TOKEN_COUNT_DELTA + 3 // More variance for chat context - ); - - const result2 = await chat.sendMessage('And what about Italy?'); - const response2 = result2.response; - expect(response2.text().trim().toLowerCase()).to.include('rome'); - - history = await chat.getHistory(); - expect(history.length).to.equal(4); expect(history[2].role).to.equal('user'); expect(history[2].parts[0].text).to.equal('And what about Italy?'); expect(history[3].role).to.equal('model'); expect(history[3].parts[0].text?.toLowerCase()).to.include('rome'); - expect(response2.usageMetadata).to.not.be.null; - expect(response2.usageMetadata!.promptTokenCount).to.be.closeTo( - 28, // History + "And what about Italy?" + system instruction - TOKEN_COUNT_DELTA + 5 // More variance for chat context with history - ); - expect(response2.usageMetadata!.candidatesTokenCount).to.be.closeTo( - 8, - TOKEN_COUNT_DELTA - ); - expect(response2.usageMetadata!.totalTokenCount).to.be.closeTo( - 36, - TOKEN_COUNT_DELTA - ); + if (model.model.includes('gemini-2.5-flash')) { + // Token counts can vary slightly in chat context + expect(response1.usageMetadata!.promptTokenCount).to.be.closeTo( + 17, // "What is the capital of France?" + system instruction + TOKEN_COUNT_DELTA + 2 // More variance for chat context + ); + expect(response1.usageMetadata!.candidatesTokenCount).to.be.closeTo( + 8, // "Paris" + TOKEN_COUNT_DELTA + ); + expect(response1.usageMetadata!.totalTokenCount).to.be.closeTo( + 49, // "What is the capital of France?" + system instruction + "Paris" + TOKEN_COUNT_DELTA + 3 // More variance for chat context + ); + expect(response1.usageMetadata!.totalTokenCount).to.be.closeTo( + 49, // "What is the capital of France?" + system instruction + "Paris" + TOKEN_COUNT_DELTA + 3 // More variance for chat context + ); + + expect(response2.usageMetadata!.promptTokenCount).to.be.closeTo( + 32, // History + "And what about Italy?" + system instruction + TOKEN_COUNT_DELTA + 5 // More variance for chat context with history + ); + expect(response2.usageMetadata!.candidatesTokenCount).to.be.closeTo( + 8, + TOKEN_COUNT_DELTA + ); + expect(response2.usageMetadata!.totalTokenCount).to.be.closeTo( + 68, + TOKEN_COUNT_DELTA + 2 + ); + } else if (model.model.includes('gemini-2.0-flash')) { + expect(response1.usageMetadata).to.not.be.null; + // Token counts can vary slightly in chat context + expect(response1.usageMetadata!.promptTokenCount).to.be.closeTo( + 15, // "What is the capital of France?" + system instruction + TOKEN_COUNT_DELTA + 2 // More variance for chat context + ); + expect(response1.usageMetadata!.candidatesTokenCount).to.be.closeTo( + 8, // "Paris" + TOKEN_COUNT_DELTA + ); + expect(response1.usageMetadata!.totalTokenCount).to.be.closeTo( + 23, // "What is the capital of France?" + system instruction + "Paris" + TOKEN_COUNT_DELTA + 3 // More variance for chat context + ); + expect(response2.usageMetadata!.promptTokenCount).to.be.closeTo( + 28, // History + "And what about Italy?" + system instruction + TOKEN_COUNT_DELTA + 5 // More variance for chat context with history + ); + expect(response2.usageMetadata!.candidatesTokenCount).to.be.closeTo( + 8, + TOKEN_COUNT_DELTA + ); + expect(response2.usageMetadata!.totalTokenCount).to.be.closeTo( + 36, + TOKEN_COUNT_DELTA + ); + } }); }); }); diff --git a/packages/ai/integration/constants.ts b/packages/ai/integration/constants.ts index 68aebf9eddc..1adfa4f47a0 100644 --- a/packages/ai/integration/constants.ts +++ b/packages/ai/integration/constants.ts @@ -52,7 +52,7 @@ const backendNames: Map = new Map([ [BackendType.VERTEX_AI, 'Vertex AI'] ]); -const modelNames: readonly string[] = ['gemini-2.0-flash']; +const modelNames: readonly string[] = ['gemini-2.0-flash', 'gemini-2.5-flash']; /** * Array of test configurations that is iterated over to get full coverage diff --git a/packages/ai/integration/generate-content.test.ts b/packages/ai/integration/generate-content.test.ts index af877396cc8..22e4b0a30ac 100644 --- a/packages/ai/integration/generate-content.test.ts +++ b/packages/ai/integration/generate-content.test.ts @@ -81,36 +81,67 @@ describe('Generate Content', () => { expect(trimmedText).to.equal('Mountain View'); expect(response.usageMetadata).to.not.be.null; - expect(response.usageMetadata!.promptTokenCount).to.be.closeTo( - 21, - TOKEN_COUNT_DELTA - ); - expect(response.usageMetadata!.candidatesTokenCount).to.be.closeTo( - 4, - TOKEN_COUNT_DELTA - ); - expect(response.usageMetadata!.totalTokenCount).to.be.closeTo( - 25, - TOKEN_COUNT_DELTA * 2 - ); - expect(response.usageMetadata!.promptTokensDetails).to.not.be.null; - expect(response.usageMetadata!.promptTokensDetails!.length).to.equal(1); - expect( - response.usageMetadata!.promptTokensDetails![0].modality - ).to.equal(Modality.TEXT); - expect( - response.usageMetadata!.promptTokensDetails![0].tokenCount - ).to.equal(21); - expect(response.usageMetadata!.candidatesTokensDetails).to.not.be.null; - expect( - response.usageMetadata!.candidatesTokensDetails!.length - ).to.equal(1); - expect( - response.usageMetadata!.candidatesTokensDetails![0].modality - ).to.equal(Modality.TEXT); - expect( - response.usageMetadata!.candidatesTokensDetails![0].tokenCount - ).to.be.closeTo(4, TOKEN_COUNT_DELTA); + + if (model.model.includes('gemini-2.5-flash')) { + expect(response.usageMetadata!.promptTokenCount).to.be.closeTo( + 22, + TOKEN_COUNT_DELTA + ); + expect(response.usageMetadata!.candidatesTokenCount).to.be.closeTo( + 2, + TOKEN_COUNT_DELTA + ); + expect(response.usageMetadata!.totalTokenCount).to.be.closeTo( + 55, + TOKEN_COUNT_DELTA * 2 + ); + expect(response.usageMetadata!.promptTokensDetails).to.not.be.null; + expect(response.usageMetadata!.promptTokensDetails!.length).to.equal( + 1 + ); + expect( + response.usageMetadata!.promptTokensDetails![0].modality + ).to.equal(Modality.TEXT); + expect( + response.usageMetadata!.promptTokensDetails![0].tokenCount + ).to.closeTo(22, TOKEN_COUNT_DELTA); + + // candidatesTokenDetails comes back about half the time, so let's just not test it. + } else if (model.model.includes('gemini-2.0-flash')) { + expect(response.usageMetadata!.promptTokenCount).to.be.closeTo( + 21, + TOKEN_COUNT_DELTA + ); + expect(response.usageMetadata!.candidatesTokenCount).to.be.closeTo( + 4, + TOKEN_COUNT_DELTA + ); + expect(response.usageMetadata!.totalTokenCount).to.be.closeTo( + 25, + TOKEN_COUNT_DELTA * 2 + ); + expect(response.usageMetadata!.promptTokensDetails).to.not.be.null; + expect(response.usageMetadata!.promptTokensDetails!.length).to.equal( + 1 + ); + expect( + response.usageMetadata!.promptTokensDetails![0].modality + ).to.equal(Modality.TEXT); + expect( + response.usageMetadata!.promptTokensDetails![0].tokenCount + ).to.equal(21); + expect(response.usageMetadata!.candidatesTokensDetails).to.not.be + .null; + expect( + response.usageMetadata!.candidatesTokensDetails!.length + ).to.equal(1); + expect( + response.usageMetadata!.candidatesTokensDetails![0].modality + ).to.equal(Modality.TEXT); + expect( + response.usageMetadata!.candidatesTokensDetails![0].tokenCount + ).to.be.closeTo(4, TOKEN_COUNT_DELTA); + } }); it('generateContentStream: text input, text output', async () => {