Skip to content

fix: improve VS Code LM token usage reporting for context window updates #6115

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions packages/types/src/__tests__/provider-settings.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@ describe("getApiProtocol", () => {
expect(getApiProtocol("litellm", "claude-instant")).toBe("openai")
expect(getApiProtocol("ollama", "claude-model")).toBe("openai")
})

it("should return 'openai' for vscode-lm provider", () => {
expect(getApiProtocol("vscode-lm")).toBe("openai")
expect(getApiProtocol("vscode-lm", "copilot-gpt-4")).toBe("openai")
expect(getApiProtocol("vscode-lm", "copilot-gpt-3.5")).toBe("openai")
})
})

describe("Edge cases", () => {
Expand Down
47 changes: 40 additions & 7 deletions src/api/providers/__tests__/vscode-lm.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -168,14 +168,19 @@ describe("VsCodeLmHandler", () => {
chunks.push(chunk)
}

expect(chunks).toHaveLength(2) // Text chunk + usage chunk
expect(chunks[0]).toEqual({
expect(chunks).toHaveLength(3) // Initial usage + text chunk + final usage chunk
expect(chunks[0]).toMatchObject({
type: "usage",
inputTokens: expect.any(Number),
outputTokens: 0,
})
expect(chunks[1]).toEqual({
type: "text",
text: responseText,
})
expect(chunks[1]).toMatchObject({
expect(chunks[2]).toMatchObject({
type: "usage",
inputTokens: expect.any(Number),
inputTokens: 0,
outputTokens: expect.any(Number),
})
})
Expand Down Expand Up @@ -216,8 +221,13 @@ describe("VsCodeLmHandler", () => {
chunks.push(chunk)
}

expect(chunks).toHaveLength(2) // Tool call chunk + usage chunk
expect(chunks[0]).toEqual({
expect(chunks).toHaveLength(3) // Initial usage + tool call chunk + final usage chunk
expect(chunks[0]).toMatchObject({
type: "usage",
inputTokens: expect.any(Number),
outputTokens: 0,
})
expect(chunks[1]).toEqual({
type: "text",
text: JSON.stringify({ type: "tool_call", ...toolCallData }),
})
Expand All @@ -234,7 +244,17 @@ describe("VsCodeLmHandler", () => {

mockLanguageModelChat.sendRequest.mockRejectedValueOnce(new Error("API Error"))

await expect(handler.createMessage(systemPrompt, messages).next()).rejects.toThrow("API Error")
const stream = handler.createMessage(systemPrompt, messages)
// First chunk should be the initial usage
const firstChunk = await stream.next()
expect(firstChunk.value).toMatchObject({
type: "usage",
inputTokens: expect.any(Number),
outputTokens: 0,
})

// The error should occur when trying to get the next chunk
await expect(stream.next()).rejects.toThrow("API Error")
})
})

Expand Down Expand Up @@ -262,6 +282,19 @@ describe("VsCodeLmHandler", () => {
})

describe("completePrompt", () => {
beforeEach(() => {
// Ensure we have a fresh mock for CancellationTokenSource
const mockCancellationTokenSource = {
token: {
isCancellationRequested: false,
onCancellationRequested: vi.fn(),
},
cancel: vi.fn(),
dispose: vi.fn(),
}
;(vscode.CancellationTokenSource as Mock).mockReturnValue(mockCancellationTokenSource)
})

it("should complete single prompt", async () => {
const mockModel = { ...mockLanguageModelChat }
;(vscode.lm.selectChatModels as Mock).mockResolvedValueOnce([mockModel])
Expand Down
33 changes: 31 additions & 2 deletions src/api/providers/vscode-lm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -361,8 +361,20 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan
// Calculate input tokens before starting the stream
const totalInputTokens: number = await this.calculateTotalInputTokens(systemPrompt, vsCodeLmMessages)

// Yield initial usage with input tokens (similar to Anthropic's message_start)
yield {
type: "usage",
inputTokens: totalInputTokens,
outputTokens: 0,
// VS Code LM doesn't provide cache token information, so we set them to 0
cacheWriteTokens: 0,
cacheReadTokens: 0,
}

// Accumulate the text and count at the end of the stream to reduce token counting overhead.
let accumulatedText: string = ""
let lastTokenCountUpdate: number = 0
const TOKEN_UPDATE_INTERVAL = 500 // Update token count every 500 characters

try {
// Create the response stream with minimal required options
Expand Down Expand Up @@ -393,6 +405,19 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan
type: "text",
text: chunk.value,
}

// Periodically yield token updates during streaming
if (accumulatedText.length - lastTokenCountUpdate > TOKEN_UPDATE_INTERVAL) {
const currentOutputTokens = await this.internalCountTokens(accumulatedText)
yield {
type: "usage",
inputTokens: 0,
outputTokens: currentOutputTokens,
cacheWriteTokens: 0,
cacheReadTokens: 0,
}
lastTokenCountUpdate = accumulatedText.length
}
} else if (chunk instanceof vscode.LanguageModelToolCallPart) {
try {
// Validate tool call parameters
Expand Down Expand Up @@ -448,10 +473,14 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan
const totalOutputTokens: number = await this.internalCountTokens(accumulatedText)

// Report final usage after stream completion
// Note: We report the total tokens here, not incremental, as the UI expects the final total
yield {
type: "usage",
inputTokens: totalInputTokens,
outputTokens: totalOutputTokens,
inputTokens: 0, // Already reported at the start
outputTokens: totalOutputTokens, // Report the final total
// VS Code LM doesn't provide cache token information, so we set them to 0
cacheWriteTokens: 0,
cacheReadTokens: 0,
}
} catch (error: unknown) {
this.ensureCleanState()
Expand Down