@@ -107,89 +107,6 @@ public ChatPlugin(
107
107
this . _contentSafety = contentSafety ;
108
108
}
109
109
110
- /// <summary>
111
- /// Extract user intent from the conversation history.
112
- /// </summary>
113
- /// <param name="kernelArguments">The KernelArguments.</param>
114
- /// <param name="cancellationToken">The cancellation token.</param>
115
- private async Task < string > ExtractUserIntentAsync ( KernelArguments kernelArguments , CancellationToken cancellationToken = default )
116
- {
117
- var tokenLimit = this . _promptOptions . CompletionTokenLimit ;
118
- var historyTokenBudget =
119
- tokenLimit -
120
- this . _promptOptions . ResponseTokenLimit -
121
- TokenUtils . TokenCount ( string . Join ( "\n " , new string [ ]
122
- {
123
- this . _promptOptions . SystemDescription ,
124
- this . _promptOptions . SystemIntent ,
125
- this . _promptOptions . SystemIntentContinuation
126
- } )
127
- ) ;
128
-
129
- // Clone the context to avoid modifying the original context variables.
130
- KernelArguments intentExtractionContext = new ( kernelArguments ) ;
131
- intentExtractionContext [ "tokenLimit" ] = historyTokenBudget . ToString ( new NumberFormatInfo ( ) ) ;
132
- intentExtractionContext [ "knowledgeCutoff" ] = this . _promptOptions . KnowledgeCutoffDate ;
133
-
134
- var completionFunction = this . _kernel . CreateFunctionFromPrompt (
135
- this . _promptOptions . SystemIntentExtraction ,
136
- this . CreateIntentCompletionSettings ( ) ,
137
- functionName : nameof ( ChatPlugin ) ,
138
- description : "Complete the prompt." ) ;
139
-
140
- var result = await completionFunction . InvokeAsync (
141
- this . _kernel ,
142
- intentExtractionContext ,
143
- cancellationToken
144
- ) ;
145
-
146
- // Get token usage from ChatCompletion result and add to context
147
- TokenUtils . GetFunctionTokenUsage ( result , intentExtractionContext , this . _logger , "SystemIntentExtraction" ) ;
148
-
149
- return $ "User intent: { result } ";
150
- }
151
-
152
- /// <summary>
153
- /// Extract the list of participants from the conversation history.
154
- /// Note that only those who have spoken will be included.
155
- /// </summary>
156
- /// <param name="context">The SKContext.</param>
157
- /// <param name="cancellationToken">The cancellation token.</param>
158
- private async Task < string > ExtractAudienceAsync ( KernelArguments context , CancellationToken cancellationToken = default )
159
- {
160
- var tokenLimit = this . _promptOptions . CompletionTokenLimit ;
161
- var historyTokenBudget =
162
- tokenLimit -
163
- this . _promptOptions . ResponseTokenLimit -
164
- TokenUtils . TokenCount ( string . Join ( "\n " , new string [ ]
165
- {
166
- this . _promptOptions . SystemAudience ,
167
- this . _promptOptions . SystemAudienceContinuation ,
168
- } )
169
- ) ;
170
-
171
- // Clone the context to avoid modifying the original context variables.
172
- KernelArguments audienceExtractionContext = new ( context ) ;
173
- audienceExtractionContext [ "tokenLimit" ] = historyTokenBudget . ToString ( new NumberFormatInfo ( ) ) ;
174
-
175
- var completionFunction = this . _kernel . CreateFunctionFromPrompt (
176
- this . _promptOptions . SystemAudienceExtraction ,
177
- this . CreateIntentCompletionSettings ( ) ,
178
- functionName : nameof ( ChatPlugin ) ,
179
- description : "Complete the prompt." ) ;
180
-
181
- var result = await completionFunction . InvokeAsync (
182
- this . _kernel ,
183
- audienceExtractionContext ,
184
- cancellationToken
185
- ) ;
186
-
187
- // Get token usage from ChatCompletion result and add to context
188
- TokenUtils . GetFunctionTokenUsage ( result , context , this . _logger , "SystemAudienceExtraction" ) ;
189
-
190
- return $ "List of participants: { result } ";
191
- }
192
-
193
110
/// <summary>
194
111
/// Method that wraps GetAllowedChatHistoryAsync to get allotted history messages as one string.
195
112
/// GetAllowedChatHistoryAsync optionally updates a ChatHistory object with the allotted messages,
@@ -324,7 +241,7 @@ private async Task<CopilotChatMessage> GetChatResponseAsync(string chatId, strin
324
241
// Render system instruction components and create the meta-prompt template
325
242
var systemInstructions = await AsyncUtils . SafeInvokeAsync (
326
243
( ) => this . RenderSystemInstructions ( chatId , chatContext , cancellationToken ) , nameof ( RenderSystemInstructions ) ) ;
327
- ChatHistory chatHistory = new ( systemInstructions ) ;
244
+ ChatHistory metaPrompt = new ( systemInstructions ) ;
328
245
329
246
// Bypass audience extraction if Auth is disabled
330
247
var audience = string . Empty ;
@@ -334,41 +251,42 @@ private async Task<CopilotChatMessage> GetChatResponseAsync(string chatId, strin
334
251
await this . UpdateBotResponseStatusOnClientAsync ( chatId , "Extracting audience" , cancellationToken ) ;
335
252
audience = await AsyncUtils . SafeInvokeAsync (
336
253
( ) => this . GetAudienceAsync ( chatContext , cancellationToken ) , nameof ( GetAudienceAsync ) ) ;
337
- chatHistory . AddSystemMessage ( audience ) ;
254
+ metaPrompt . AddSystemMessage ( audience ) ;
338
255
}
339
256
340
257
// Extract user intent from the conversation history.
341
258
await this . UpdateBotResponseStatusOnClientAsync ( chatId , "Extracting user intent" , cancellationToken ) ;
342
259
var userIntent = await AsyncUtils . SafeInvokeAsync (
343
260
( ) => this . GetUserIntentAsync ( chatContext , cancellationToken ) , nameof ( GetUserIntentAsync ) ) ;
344
- chatHistory . AddSystemMessage ( userIntent ) ;
261
+ metaPrompt . AddSystemMessage ( userIntent ) ;
345
262
346
- // Calculate the remaining token budget.
347
- var remainingTokenBudget = this . GetChatContextTokenLimit ( chatHistory , userMessage . ToFormattedString ( ) ) ;
263
+ // Calculate max amount of tokens to use for memories
264
+ int maxRequestTokenBudget = this . GetMaxRequestTokenBudget ( ) ;
265
+ // Calculate tokens used so far: system instructions, audience extraction and user intent
266
+ int tokensUsed = TokenUtils . GetContextMessagesTokenCount ( metaPrompt ) ;
267
+ int chatMemoryTokenBudget = maxRequestTokenBudget
268
+ - tokensUsed
269
+ - TokenUtils . GetContextMessageTokenCount ( AuthorRole . User , userMessage . ToFormattedString ( ) ) ;
270
+ chatMemoryTokenBudget = ( int ) ( chatMemoryTokenBudget * this . _promptOptions . MemoriesResponseContextWeight ) ;
348
271
349
272
// Query relevant semantic and document memories
350
273
await this . UpdateBotResponseStatusOnClientAsync ( chatId , "Extracting semantic and document memories" , cancellationToken ) ;
351
- var chatMemoriesTokenLimit = ( int ) ( remainingTokenBudget * this . _promptOptions . MemoriesResponseContextWeight ) ;
352
- ( var memoryText , var citationMap ) = await this . _semanticMemoryRetriever . QueryMemoriesAsync ( userIntent , chatId , chatMemoriesTokenLimit ) ;
353
-
274
+ ( var memoryText , var citationMap ) = await this . _semanticMemoryRetriever . QueryMemoriesAsync ( userIntent , chatId , chatMemoryTokenBudget ) ;
354
275
if ( ! string . IsNullOrWhiteSpace ( memoryText ) )
355
276
{
356
- chatHistory . AddSystemMessage ( memoryText ) ;
277
+ metaPrompt . AddSystemMessage ( memoryText ) ;
278
+ tokensUsed += TokenUtils . GetContextMessageTokenCount ( AuthorRole . System , memoryText ) ;
357
279
}
358
280
359
- // Fill in the chat history with remaining token budget.
360
- string allowedChatHistory = string . Empty ;
361
- var allowedChatHistoryTokenBudget = remainingTokenBudget - TokenUtils . GetContextMessageTokenCount ( AuthorRole . System , memoryText ) ;
362
-
363
- // Append previous messages
281
+ // Add as many chat history messages to meta-prompt as the token budget will allow
364
282
await this . UpdateBotResponseStatusOnClientAsync ( chatId , "Extracting chat history" , cancellationToken ) ;
365
- allowedChatHistory = await this . GetAllowedChatHistoryAsync ( chatId , allowedChatHistoryTokenBudget , chatHistory , cancellationToken ) ;
283
+ string allowedChatHistory = await this . GetAllowedChatHistoryAsync ( chatId , maxRequestTokenBudget - tokensUsed , metaPrompt , cancellationToken ) ;
366
284
367
- // Calculate token usage of prompt template
368
- chatContext [ TokenUtils . GetFunctionKey ( this . _logger , "SystemMetaPrompt" ) ! ] = TokenUtils . GetContextMessagesTokenCount ( chatHistory ) . ToString ( CultureInfo . CurrentCulture ) ;
285
+ // Store token usage of prompt template
286
+ chatContext [ TokenUtils . GetFunctionKey ( "SystemMetaPrompt" ) ] = TokenUtils . GetContextMessagesTokenCount ( metaPrompt ) . ToString ( CultureInfo . CurrentCulture ) ;
369
287
370
288
// Stream the response to the client
371
- var promptView = new BotResponsePrompt ( systemInstructions , audience , userIntent , memoryText , allowedChatHistory , chatHistory ) ;
289
+ var promptView = new BotResponsePrompt ( systemInstructions , audience , userIntent , memoryText , allowedChatHistory , metaPrompt ) ;
372
290
373
291
return await this . HandleBotResponseAsync ( chatId , userId , chatContext , promptView , citationMap . Values . AsEnumerable ( ) , cancellationToken ) ;
374
292
}
@@ -429,7 +347,7 @@ await AsyncUtils.SafeInvokeAsync(
429
347
cancellationToken ) , nameof ( SemanticChatMemoryExtractor . ExtractSemanticChatMemoryAsync ) ) ;
430
348
431
349
// Calculate total token usage for dependency functions and prompt template
432
- await this . UpdateBotResponseStatusOnClientAsync ( chatId , "Calculating token usage" , cancellationToken ) ;
350
+ await this . UpdateBotResponseStatusOnClientAsync ( chatId , "Saving token usage" , cancellationToken ) ;
433
351
chatMessage . TokenUsage = this . GetTokenUsages ( chatContext , chatMessage . Content ) ;
434
352
435
353
// Update the message on client and in chat history with final completion token usage
@@ -449,16 +367,38 @@ private async Task<string> GetAudienceAsync(KernelArguments context, Cancellatio
449
367
{
450
368
// Clone the context to avoid modifying the original context variables
451
369
KernelArguments audienceContext = new ( context ) ;
452
- var audience = await this . ExtractAudienceAsync ( audienceContext , cancellationToken ) ;
370
+ int historyTokenBudget =
371
+ this . _promptOptions . CompletionTokenLimit -
372
+ this . _promptOptions . ResponseTokenLimit -
373
+ TokenUtils . TokenCount ( string . Join ( "\n \n " , new string [ ]
374
+ {
375
+ this . _promptOptions . SystemAudience ,
376
+ this . _promptOptions . SystemAudienceContinuation ,
377
+ } )
378
+ ) ;
379
+
380
+ audienceContext [ "tokenLimit" ] = historyTokenBudget . ToString ( new NumberFormatInfo ( ) ) ;
453
381
454
- // Copy token usage into original chat context
455
- var functionKey = TokenUtils . GetFunctionKey ( this . _logger , "SystemAudienceExtraction" ) ! ;
456
- if ( audienceContext . TryGetValue ( functionKey , out object ? tokenUsage ) )
382
+ var completionFunction = this . _kernel . CreateFunctionFromPrompt (
383
+ this . _promptOptions . SystemAudienceExtraction ,
384
+ this . CreateIntentCompletionSettings ( ) ,
385
+ functionName : "SystemAudienceExtraction" ,
386
+ description : "Extract audience" ) ;
387
+
388
+ var result = await completionFunction . InvokeAsync ( this . _kernel , audienceContext , cancellationToken ) ;
389
+
390
+ // Get token usage from ChatCompletion result and add to original context
391
+ string ? tokenUsage = TokenUtils . GetFunctionTokenUsage ( result , this . _logger ) ;
392
+ if ( tokenUsage is not null )
457
393
{
458
- context [ functionKey ] = tokenUsage ;
394
+ context [ TokenUtils . GetFunctionKey ( "SystemAudienceExtraction" ) ] = tokenUsage ;
395
+ }
396
+ else
397
+ {
398
+ this . _logger . LogError ( "Unable to determine token usage for audienceExtraction" ) ;
459
399
}
460
400
461
- return audience ;
401
+ return $ "List of participants: { result } " ;
462
402
}
463
403
464
404
/// <summary>
@@ -470,16 +410,41 @@ private async Task<string> GetUserIntentAsync(KernelArguments context, Cancellat
470
410
{
471
411
// Clone the context to avoid modifying the original context variables
472
412
KernelArguments intentContext = new ( context ) ;
473
- string userIntent = await this . ExtractUserIntentAsync ( intentContext , cancellationToken ) ;
474
413
475
- // Copy token usage into original chat context
476
- var functionKey = TokenUtils . GetFunctionKey ( this . _logger , "SystemIntentExtraction" ) ! ;
477
- if ( intentContext . TryGetValue ( functionKey ! , out object ? tokenUsage ) )
414
+ int tokenBudget =
415
+ this . _promptOptions . CompletionTokenLimit -
416
+ this . _promptOptions . ResponseTokenLimit -
417
+ TokenUtils . TokenCount ( string . Join ( "\n " , new string [ ]
418
+ {
419
+ this . _promptOptions . SystemPersona ,
420
+ this . _promptOptions . SystemIntent ,
421
+ this . _promptOptions . SystemIntentContinuation
422
+ } )
423
+ ) ;
424
+
425
+ intentContext [ "tokenLimit" ] = tokenBudget . ToString ( new NumberFormatInfo ( ) ) ;
426
+ intentContext [ "knowledgeCutoff" ] = this . _promptOptions . KnowledgeCutoffDate ;
427
+
428
+ var completionFunction = this . _kernel . CreateFunctionFromPrompt (
429
+ this . _promptOptions . SystemIntentExtraction ,
430
+ this . CreateIntentCompletionSettings ( ) ,
431
+ functionName : "UserIntentExtraction" ,
432
+ description : "Extract user intent" ) ;
433
+
434
+ var result = await completionFunction . InvokeAsync ( this . _kernel , intentContext , cancellationToken ) ;
435
+
436
+ // Get token usage from ChatCompletion result and add to original context
437
+ string ? tokenUsage = TokenUtils . GetFunctionTokenUsage ( result , this . _logger ) ;
438
+ if ( tokenUsage is not null )
478
439
{
479
- context [ functionKey ! ] = tokenUsage ;
440
+ context [ TokenUtils . GetFunctionKey ( "SystemIntentExtraction" ) ] = tokenUsage ;
441
+ }
442
+ else
443
+ {
444
+ this . _logger . LogError ( "Unable to determine token usage for userIntentExtraction" ) ;
480
445
}
481
446
482
- return userIntent ;
447
+ return $ "User intent: { result } " ;
483
448
}
484
449
485
450
/// <summary>
@@ -610,24 +575,18 @@ private OpenAIPromptExecutionSettings CreateIntentCompletionSettings()
610
575
}
611
576
612
577
/// <summary>
613
- /// Calculate the remaining token budget for the chat response prompt.
614
- /// This is the token limit minus the token count of the user intent, audience, and the system commands.
578
+ /// Calculate the maximum number of tokens that can be sent in a request
615
579
/// </summary>
616
- /// <param name="promptTemplate">All current messages to use for chat completion</param>
617
- /// <param name="userIntent">The user message.</param>
618
- /// <returns>The remaining token limit.</returns>
619
- private int GetChatContextTokenLimit ( ChatHistory promptTemplate , string userInput = "" )
580
+ private int GetMaxRequestTokenBudget ( )
620
581
{
621
582
// OpenAI inserts a message under the hood:
622
583
// "content": "Assistant is a large language model.","role": "system"
623
584
// This burns just under 20 tokens which need to be accounted for.
624
585
const int ExtraOpenAiMessageTokens = 20 ;
625
586
626
- return this . _promptOptions . CompletionTokenLimit
587
+ return this . _promptOptions . CompletionTokenLimit // Total token limit
627
588
- ExtraOpenAiMessageTokens
628
- - TokenUtils . GetContextMessagesTokenCount ( promptTemplate )
629
- - TokenUtils . GetContextMessageTokenCount ( AuthorRole . User , userInput ) // User message has to be included in chat history allowance
630
- - this . _promptOptions . ResponseTokenLimit ;
589
+ - this . _promptOptions . ResponseTokenLimit ; // Token count reserved for model to generate a response
631
590
}
632
591
633
592
/// <summary>
0 commit comments