Skip to content

Commit 1bea79b

Browse files
authored
Merge pull request #17 from microsoft/main
[pull] main from microsoft:main
2 parents 3c60915 + 8ca758c commit 1bea79b

File tree

3 files changed

+118
-171
lines changed

3 files changed

+118
-171
lines changed

webapi/Plugins/Chat/ChatPlugin.cs

Lines changed: 83 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -107,89 +107,6 @@ public ChatPlugin(
107107
this._contentSafety = contentSafety;
108108
}
109109

110-
/// <summary>
111-
/// Extract user intent from the conversation history.
112-
/// </summary>
113-
/// <param name="kernelArguments">The KernelArguments.</param>
114-
/// <param name="cancellationToken">The cancellation token.</param>
115-
private async Task<string> ExtractUserIntentAsync(KernelArguments kernelArguments, CancellationToken cancellationToken = default)
116-
{
117-
var tokenLimit = this._promptOptions.CompletionTokenLimit;
118-
var historyTokenBudget =
119-
tokenLimit -
120-
this._promptOptions.ResponseTokenLimit -
121-
TokenUtils.TokenCount(string.Join("\n", new string[]
122-
{
123-
this._promptOptions.SystemDescription,
124-
this._promptOptions.SystemIntent,
125-
this._promptOptions.SystemIntentContinuation
126-
})
127-
);
128-
129-
// Clone the context to avoid modifying the original context variables.
130-
KernelArguments intentExtractionContext = new(kernelArguments);
131-
intentExtractionContext["tokenLimit"] = historyTokenBudget.ToString(new NumberFormatInfo());
132-
intentExtractionContext["knowledgeCutoff"] = this._promptOptions.KnowledgeCutoffDate;
133-
134-
var completionFunction = this._kernel.CreateFunctionFromPrompt(
135-
this._promptOptions.SystemIntentExtraction,
136-
this.CreateIntentCompletionSettings(),
137-
functionName: nameof(ChatPlugin),
138-
description: "Complete the prompt.");
139-
140-
var result = await completionFunction.InvokeAsync(
141-
this._kernel,
142-
intentExtractionContext,
143-
cancellationToken
144-
);
145-
146-
// Get token usage from ChatCompletion result and add to context
147-
TokenUtils.GetFunctionTokenUsage(result, intentExtractionContext, this._logger, "SystemIntentExtraction");
148-
149-
return $"User intent: {result}";
150-
}
151-
152-
/// <summary>
153-
/// Extract the list of participants from the conversation history.
154-
/// Note that only those who have spoken will be included.
155-
/// </summary>
156-
/// <param name="context">The SKContext.</param>
157-
/// <param name="cancellationToken">The cancellation token.</param>
158-
private async Task<string> ExtractAudienceAsync(KernelArguments context, CancellationToken cancellationToken = default)
159-
{
160-
var tokenLimit = this._promptOptions.CompletionTokenLimit;
161-
var historyTokenBudget =
162-
tokenLimit -
163-
this._promptOptions.ResponseTokenLimit -
164-
TokenUtils.TokenCount(string.Join("\n", new string[]
165-
{
166-
this._promptOptions.SystemAudience,
167-
this._promptOptions.SystemAudienceContinuation,
168-
})
169-
);
170-
171-
// Clone the context to avoid modifying the original context variables.
172-
KernelArguments audienceExtractionContext = new(context);
173-
audienceExtractionContext["tokenLimit"] = historyTokenBudget.ToString(new NumberFormatInfo());
174-
175-
var completionFunction = this._kernel.CreateFunctionFromPrompt(
176-
this._promptOptions.SystemAudienceExtraction,
177-
this.CreateIntentCompletionSettings(),
178-
functionName: nameof(ChatPlugin),
179-
description: "Complete the prompt.");
180-
181-
var result = await completionFunction.InvokeAsync(
182-
this._kernel,
183-
audienceExtractionContext,
184-
cancellationToken
185-
);
186-
187-
// Get token usage from ChatCompletion result and add to context
188-
TokenUtils.GetFunctionTokenUsage(result, context, this._logger, "SystemAudienceExtraction");
189-
190-
return $"List of participants: {result}";
191-
}
192-
193110
/// <summary>
194111
/// Method that wraps GetAllowedChatHistoryAsync to get allotted history messages as one string.
195112
/// GetAllowedChatHistoryAsync optionally updates a ChatHistory object with the allotted messages,
@@ -324,7 +241,7 @@ private async Task<CopilotChatMessage> GetChatResponseAsync(string chatId, strin
324241
// Render system instruction components and create the meta-prompt template
325242
var systemInstructions = await AsyncUtils.SafeInvokeAsync(
326243
() => this.RenderSystemInstructions(chatId, chatContext, cancellationToken), nameof(RenderSystemInstructions));
327-
ChatHistory chatHistory = new(systemInstructions);
244+
ChatHistory metaPrompt = new(systemInstructions);
328245

329246
// Bypass audience extraction if Auth is disabled
330247
var audience = string.Empty;
@@ -334,41 +251,42 @@ private async Task<CopilotChatMessage> GetChatResponseAsync(string chatId, strin
334251
await this.UpdateBotResponseStatusOnClientAsync(chatId, "Extracting audience", cancellationToken);
335252
audience = await AsyncUtils.SafeInvokeAsync(
336253
() => this.GetAudienceAsync(chatContext, cancellationToken), nameof(GetAudienceAsync));
337-
chatHistory.AddSystemMessage(audience);
254+
metaPrompt.AddSystemMessage(audience);
338255
}
339256

340257
// Extract user intent from the conversation history.
341258
await this.UpdateBotResponseStatusOnClientAsync(chatId, "Extracting user intent", cancellationToken);
342259
var userIntent = await AsyncUtils.SafeInvokeAsync(
343260
() => this.GetUserIntentAsync(chatContext, cancellationToken), nameof(GetUserIntentAsync));
344-
chatHistory.AddSystemMessage(userIntent);
261+
metaPrompt.AddSystemMessage(userIntent);
345262

346-
// Calculate the remaining token budget.
347-
var remainingTokenBudget = this.GetChatContextTokenLimit(chatHistory, userMessage.ToFormattedString());
263+
// Calculate max amount of tokens to use for memories
264+
int maxRequestTokenBudget = this.GetMaxRequestTokenBudget();
265+
// Calculate tokens used so far: system instructions, audience extraction and user intent
266+
int tokensUsed = TokenUtils.GetContextMessagesTokenCount(metaPrompt);
267+
int chatMemoryTokenBudget = maxRequestTokenBudget
268+
- tokensUsed
269+
- TokenUtils.GetContextMessageTokenCount(AuthorRole.User, userMessage.ToFormattedString());
270+
chatMemoryTokenBudget = (int)(chatMemoryTokenBudget * this._promptOptions.MemoriesResponseContextWeight);
348271

349272
// Query relevant semantic and document memories
350273
await this.UpdateBotResponseStatusOnClientAsync(chatId, "Extracting semantic and document memories", cancellationToken);
351-
var chatMemoriesTokenLimit = (int)(remainingTokenBudget * this._promptOptions.MemoriesResponseContextWeight);
352-
(var memoryText, var citationMap) = await this._semanticMemoryRetriever.QueryMemoriesAsync(userIntent, chatId, chatMemoriesTokenLimit);
353-
274+
(var memoryText, var citationMap) = await this._semanticMemoryRetriever.QueryMemoriesAsync(userIntent, chatId, chatMemoryTokenBudget);
354275
if (!string.IsNullOrWhiteSpace(memoryText))
355276
{
356-
chatHistory.AddSystemMessage(memoryText);
277+
metaPrompt.AddSystemMessage(memoryText);
278+
tokensUsed += TokenUtils.GetContextMessageTokenCount(AuthorRole.System, memoryText);
357279
}
358280

359-
// Fill in the chat history with remaining token budget.
360-
string allowedChatHistory = string.Empty;
361-
var allowedChatHistoryTokenBudget = remainingTokenBudget - TokenUtils.GetContextMessageTokenCount(AuthorRole.System, memoryText);
362-
363-
// Append previous messages
281+
// Add as many chat history messages to meta-prompt as the token budget will allow
364282
await this.UpdateBotResponseStatusOnClientAsync(chatId, "Extracting chat history", cancellationToken);
365-
allowedChatHistory = await this.GetAllowedChatHistoryAsync(chatId, allowedChatHistoryTokenBudget, chatHistory, cancellationToken);
283+
string allowedChatHistory = await this.GetAllowedChatHistoryAsync(chatId, maxRequestTokenBudget - tokensUsed, metaPrompt, cancellationToken);
366284

367-
// Calculate token usage of prompt template
368-
chatContext[TokenUtils.GetFunctionKey(this._logger, "SystemMetaPrompt")!] = TokenUtils.GetContextMessagesTokenCount(chatHistory).ToString(CultureInfo.CurrentCulture);
285+
// Store token usage of prompt template
286+
chatContext[TokenUtils.GetFunctionKey("SystemMetaPrompt")] = TokenUtils.GetContextMessagesTokenCount(metaPrompt).ToString(CultureInfo.CurrentCulture);
369287

370288
// Stream the response to the client
371-
var promptView = new BotResponsePrompt(systemInstructions, audience, userIntent, memoryText, allowedChatHistory, chatHistory);
289+
var promptView = new BotResponsePrompt(systemInstructions, audience, userIntent, memoryText, allowedChatHistory, metaPrompt);
372290

373291
return await this.HandleBotResponseAsync(chatId, userId, chatContext, promptView, citationMap.Values.AsEnumerable(), cancellationToken);
374292
}
@@ -429,7 +347,7 @@ await AsyncUtils.SafeInvokeAsync(
429347
cancellationToken), nameof(SemanticChatMemoryExtractor.ExtractSemanticChatMemoryAsync));
430348

431349
// Calculate total token usage for dependency functions and prompt template
432-
await this.UpdateBotResponseStatusOnClientAsync(chatId, "Calculating token usage", cancellationToken);
350+
await this.UpdateBotResponseStatusOnClientAsync(chatId, "Saving token usage", cancellationToken);
433351
chatMessage.TokenUsage = this.GetTokenUsages(chatContext, chatMessage.Content);
434352

435353
// Update the message on client and in chat history with final completion token usage
@@ -449,16 +367,38 @@ private async Task<string> GetAudienceAsync(KernelArguments context, Cancellatio
449367
{
450368
// Clone the context to avoid modifying the original context variables
451369
KernelArguments audienceContext = new(context);
452-
var audience = await this.ExtractAudienceAsync(audienceContext, cancellationToken);
370+
int historyTokenBudget =
371+
this._promptOptions.CompletionTokenLimit -
372+
this._promptOptions.ResponseTokenLimit -
373+
TokenUtils.TokenCount(string.Join("\n\n", new string[]
374+
{
375+
this._promptOptions.SystemAudience,
376+
this._promptOptions.SystemAudienceContinuation,
377+
})
378+
);
379+
380+
audienceContext["tokenLimit"] = historyTokenBudget.ToString(new NumberFormatInfo());
453381

454-
// Copy token usage into original chat context
455-
var functionKey = TokenUtils.GetFunctionKey(this._logger, "SystemAudienceExtraction")!;
456-
if (audienceContext.TryGetValue(functionKey, out object? tokenUsage))
382+
var completionFunction = this._kernel.CreateFunctionFromPrompt(
383+
this._promptOptions.SystemAudienceExtraction,
384+
this.CreateIntentCompletionSettings(),
385+
functionName: "SystemAudienceExtraction",
386+
description: "Extract audience");
387+
388+
var result = await completionFunction.InvokeAsync(this._kernel, audienceContext, cancellationToken);
389+
390+
// Get token usage from ChatCompletion result and add to original context
391+
string? tokenUsage = TokenUtils.GetFunctionTokenUsage(result, this._logger);
392+
if (tokenUsage is not null)
457393
{
458-
context[functionKey] = tokenUsage;
394+
context[TokenUtils.GetFunctionKey("SystemAudienceExtraction")] = tokenUsage;
395+
}
396+
else
397+
{
398+
this._logger.LogError("Unable to determine token usage for audienceExtraction");
459399
}
460400

461-
return audience;
401+
return $"List of participants: {result}";
462402
}
463403

464404
/// <summary>
@@ -470,16 +410,41 @@ private async Task<string> GetUserIntentAsync(KernelArguments context, Cancellat
470410
{
471411
// Clone the context to avoid modifying the original context variables
472412
KernelArguments intentContext = new(context);
473-
string userIntent = await this.ExtractUserIntentAsync(intentContext, cancellationToken);
474413

475-
// Copy token usage into original chat context
476-
var functionKey = TokenUtils.GetFunctionKey(this._logger, "SystemIntentExtraction")!;
477-
if (intentContext.TryGetValue(functionKey!, out object? tokenUsage))
414+
int tokenBudget =
415+
this._promptOptions.CompletionTokenLimit -
416+
this._promptOptions.ResponseTokenLimit -
417+
TokenUtils.TokenCount(string.Join("\n", new string[]
418+
{
419+
this._promptOptions.SystemPersona,
420+
this._promptOptions.SystemIntent,
421+
this._promptOptions.SystemIntentContinuation
422+
})
423+
);
424+
425+
intentContext["tokenLimit"] = tokenBudget.ToString(new NumberFormatInfo());
426+
intentContext["knowledgeCutoff"] = this._promptOptions.KnowledgeCutoffDate;
427+
428+
var completionFunction = this._kernel.CreateFunctionFromPrompt(
429+
this._promptOptions.SystemIntentExtraction,
430+
this.CreateIntentCompletionSettings(),
431+
functionName: "UserIntentExtraction",
432+
description: "Extract user intent");
433+
434+
var result = await completionFunction.InvokeAsync(this._kernel, intentContext, cancellationToken);
435+
436+
// Get token usage from ChatCompletion result and add to original context
437+
string? tokenUsage = TokenUtils.GetFunctionTokenUsage(result, this._logger);
438+
if (tokenUsage is not null)
478439
{
479-
context[functionKey!] = tokenUsage;
440+
context[TokenUtils.GetFunctionKey("SystemIntentExtraction")] = tokenUsage;
441+
}
442+
else
443+
{
444+
this._logger.LogError("Unable to determine token usage for userIntentExtraction");
480445
}
481446

482-
return userIntent;
447+
return $"User intent: {result}";
483448
}
484449

485450
/// <summary>
@@ -610,24 +575,18 @@ private OpenAIPromptExecutionSettings CreateIntentCompletionSettings()
610575
}
611576

612577
/// <summary>
613-
/// Calculate the remaining token budget for the chat response prompt.
614-
/// This is the token limit minus the token count of the user intent, audience, and the system commands.
578+
/// Calculate the maximum number of tokens that can be sent in a request
615579
/// </summary>
616-
/// <param name="promptTemplate">All current messages to use for chat completion</param>
617-
/// <param name="userIntent">The user message.</param>
618-
/// <returns>The remaining token limit.</returns>
619-
private int GetChatContextTokenLimit(ChatHistory promptTemplate, string userInput = "")
580+
private int GetMaxRequestTokenBudget()
620581
{
621582
// OpenAI inserts a message under the hood:
622583
// "content": "Assistant is a large language model.","role": "system"
623584
// This burns just under 20 tokens which need to be accounted for.
624585
const int ExtraOpenAiMessageTokens = 20;
625586

626-
return this._promptOptions.CompletionTokenLimit
587+
return this._promptOptions.CompletionTokenLimit // Total token limit
627588
- ExtraOpenAiMessageTokens
628-
- TokenUtils.GetContextMessagesTokenCount(promptTemplate)
629-
- TokenUtils.GetContextMessageTokenCount(AuthorRole.User, userInput) // User message has to be included in chat history allowance
630-
- this._promptOptions.ResponseTokenLimit;
589+
- this._promptOptions.ResponseTokenLimit; // Token count reserved for model to generate a response
631590
}
632591

633592
/// <summary>

webapi/Plugins/Chat/SemanticChatMemoryExtractor.cs

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,10 +91,19 @@ async Task<SemanticChatMemory> ExtractCognitiveMemoryAsync(string memoryType, st
9191
cancellationToken);
9292

9393
// Get token usage from ChatCompletion result and add to context
94-
// Since there are multiple memory types, total token usage is calculated by cumulating the token usage of each memory type.
95-
TokenUtils.GetFunctionTokenUsage(result, kernelArguments, logger, $"SystemCognitive_{memoryType}");
94+
string? tokenUsage = TokenUtils.GetFunctionTokenUsage(result, logger);
95+
if (tokenUsage is not null)
96+
{
97+
// Since there are multiple memory types, total token usage is calculated by cumulating the token usage of each memory type.
98+
kernelArguments[TokenUtils.GetFunctionKey($"SystemCognitive_{memoryType}")] = tokenUsage;
99+
}
100+
else
101+
{
102+
logger.LogError("Unable to determine token usage for {0}", $"SystemCognitive_{memoryType}");
103+
}
96104

97105
SemanticChatMemory memory = SemanticChatMemory.FromJson(result.ToString());
106+
98107
return memory;
99108
}
100109

0 commit comments

Comments
 (0)