diff --git a/dotnet/SK-dotnet.sln b/dotnet/SK-dotnet.sln index 084201234f3b..3da38e86191f 100644 --- a/dotnet/SK-dotnet.sln +++ b/dotnet/SK-dotnet.sln @@ -461,6 +461,7 @@ EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ProcessFramework.Aspire.SummaryAgent", "samples\Demos\ProcessFrameworkWithAspire\ProcessFramework.Aspire\ProcessFramework.Aspire.SummaryAgent\ProcessFramework.Aspire.SummaryAgent.csproj", "{37381352-4F10-427F-AB8A-51FEAB265201}" EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ProcessFramework.Aspire.TranslatorAgent", "samples\Demos\ProcessFrameworkWithAspire\ProcessFramework.Aspire\ProcessFramework.Aspire.TranslatorAgent\ProcessFramework.Aspire.TranslatorAgent.csproj", "{DAD5FC6A-8CA0-43AC-87E1-032DFBD6B02A}" +EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Agents.Bedrock", "src\Agents\Bedrock\Agents.Bedrock.csproj", "{8C658E1E-83C8-4127-B8BF-27A638A45DDD}" EndProject Global diff --git a/dotnet/samples/Concepts/ChatCompletion/AzureAIInference_ChatCompletion.cs b/dotnet/samples/Concepts/ChatCompletion/AzureAIInference_ChatCompletion.cs index e42600419a88..2763bb6101b0 100644 --- a/dotnet/samples/Concepts/ChatCompletion/AzureAIInference_ChatCompletion.cs +++ b/dotnet/samples/Concepts/ChatCompletion/AzureAIInference_ChatCompletion.cs @@ -8,7 +8,11 @@ namespace ChatCompletion; -// The following example shows how to use Semantic Kernel with Azure AI Inference / Azure AI Studio +/// +/// These examples demonstrate different ways of using chat completion with Azure Foundry or GitHub models. +/// Azure AI Foundry: https://ai.azure.com/explore/models +/// GitHub Models: https://github.com/marketplace?type=models +/// public class AzureAIInference_ChatCompletion(ITestOutputHelper output) : BaseTest(output) { [Fact] diff --git a/dotnet/samples/Concepts/ChatCompletion/AzureAIInference_ChatCompletionStreaming.cs b/dotnet/samples/Concepts/ChatCompletion/AzureAIInference_ChatCompletionStreaming.cs index f7dbe9191167..8b164439f9e2 100644 --- a/dotnet/samples/Concepts/ChatCompletion/AzureAIInference_ChatCompletionStreaming.cs +++ b/dotnet/samples/Concepts/ChatCompletion/AzureAIInference_ChatCompletionStreaming.cs @@ -9,7 +9,9 @@ namespace ChatCompletion; /// -/// These examples demonstrate the ways different content types are streamed by OpenAI LLM via the chat completion service. +/// These examples demonstrate different ways of using streaming chat completion with Azure Foundry or GitHub models. +/// Azure AI Foundry: https://ai.azure.com/explore/models +/// GitHub Models: https://github.com/marketplace?type=models /// public class AzureAIInference_ChatCompletionStreaming(ITestOutputHelper output) : BaseTest(output) { @@ -120,36 +122,6 @@ private async Task StartStreamingChatAsync(IChatCompletionService chatCompletion await StreamMessageOutputAsync(chatCompletionService, chatHistory, AuthorRole.Assistant); } - /// - /// Streams the message output from the chat completion service. - /// - /// The chat completion service instance. - /// The chat history instance. - /// The author role. - private async Task StreamMessageOutputAsync(IChatCompletionService chatCompletionService, ChatHistory chatHistory, AuthorRole authorRole) - { - bool roleWritten = false; - string fullMessage = string.Empty; - - await foreach (var chatUpdate in chatCompletionService.GetStreamingChatMessageContentsAsync(chatHistory)) - { - if (!roleWritten && chatUpdate.Role.HasValue) - { - Console.Write($"{chatUpdate.Role.Value}: {chatUpdate.Content}"); - roleWritten = true; - } - - if (chatUpdate.Content is { Length: > 0 }) - { - fullMessage += chatUpdate.Content; - Console.Write(chatUpdate.Content); - } - } - - Console.WriteLine("\n------------------------"); - chatHistory.AddMessage(authorRole, fullMessage); - } - /// /// Outputs the chat history by streaming the message output from the kernel. /// diff --git a/dotnet/samples/Concepts/ChatCompletion/AzureOpenAI_ChatCompletion.cs b/dotnet/samples/Concepts/ChatCompletion/AzureOpenAI_ChatCompletion.cs index e0bc277b9f2d..f3a52b5c5428 100644 --- a/dotnet/samples/Concepts/ChatCompletion/AzureOpenAI_ChatCompletion.cs +++ b/dotnet/samples/Concepts/ChatCompletion/AzureOpenAI_ChatCompletion.cs @@ -8,7 +8,9 @@ namespace ChatCompletion; -// The following example shows how to use Semantic Kernel with Azure OpenAI API +/// +/// These examples demonstrate different ways of using chat completion with Azure OpenAI API. +/// public class AzureOpenAI_ChatCompletion(ITestOutputHelper output) : BaseTest(output) { /// diff --git a/dotnet/samples/Concepts/ChatCompletion/AzureOpenAI_ChatCompletionStreaming.cs b/dotnet/samples/Concepts/ChatCompletion/AzureOpenAI_ChatCompletionStreaming.cs index 1ef3647623aa..29dfe10d6bd1 100644 --- a/dotnet/samples/Concepts/ChatCompletion/AzureOpenAI_ChatCompletionStreaming.cs +++ b/dotnet/samples/Concepts/ChatCompletion/AzureOpenAI_ChatCompletionStreaming.cs @@ -8,7 +8,7 @@ namespace ChatCompletion; /// -/// These examples demonstrate the ways different content types are streamed by Azure OpenAI via the chat completion service. +/// These examples demonstrate different ways of using streaming chat completion with Azure OpenAI API. /// public class AzureOpenAI_ChatCompletionStreaming(ITestOutputHelper output) : BaseTest(output) { @@ -128,28 +128,4 @@ private async Task StartStreamingChatAsync(IChatCompletionService chatCompletion // Second assistant message await StreamMessageOutputAsync(chatCompletionService, chatHistory, AuthorRole.Assistant); } - - private async Task StreamMessageOutputAsync(IChatCompletionService chatCompletionService, ChatHistory chatHistory, AuthorRole authorRole) - { - bool roleWritten = false; - string fullMessage = string.Empty; - - await foreach (var chatUpdate in chatCompletionService.GetStreamingChatMessageContentsAsync(chatHistory)) - { - if (!roleWritten && chatUpdate.Role.HasValue) - { - Console.Write($"{chatUpdate.Role.Value}: {chatUpdate.Content}"); - roleWritten = true; - } - - if (chatUpdate.Content is { Length: > 0 }) - { - fullMessage += chatUpdate.Content; - Console.Write(chatUpdate.Content); - } - } - - Console.WriteLine("\n------------------------"); - chatHistory.AddMessage(authorRole, fullMessage); - } } diff --git a/dotnet/samples/Concepts/ChatCompletion/AzureOpenAI_ChatCompletion_WithReasoning.cs b/dotnet/samples/Concepts/ChatCompletion/AzureOpenAI_ChatCompletionWithReasoning.cs similarity index 94% rename from dotnet/samples/Concepts/ChatCompletion/AzureOpenAI_ChatCompletion_WithReasoning.cs rename to dotnet/samples/Concepts/ChatCompletion/AzureOpenAI_ChatCompletionWithReasoning.cs index 6d8bebb4f27e..cc9660c4cfa2 100644 --- a/dotnet/samples/Concepts/ChatCompletion/AzureOpenAI_ChatCompletion_WithReasoning.cs +++ b/dotnet/samples/Concepts/ChatCompletion/AzureOpenAI_ChatCompletionWithReasoning.cs @@ -8,8 +8,10 @@ namespace ChatCompletion; -// The following example shows how to use Semantic Kernel with Azure OpenAI API -public class AzureOpenAI_ChatCompletion_WithReasoning(ITestOutputHelper output) : BaseTest(output) +/// +/// These examples demonstrate different ways of using chat completion reasoning models with Azure OpenAI API. +/// +public class AzureOpenAI_ChatCompletionWithReasoning(ITestOutputHelper output) : BaseTest(output) { /// /// Sample showing how to use with chat completion and chat prompt syntax. diff --git a/dotnet/samples/Concepts/ChatCompletion/AzureOpenAI_CustomClient.cs b/dotnet/samples/Concepts/ChatCompletion/AzureOpenAI_CustomClient.cs index eafae661111b..a76a954c1bfa 100644 --- a/dotnet/samples/Concepts/ChatCompletion/AzureOpenAI_CustomClient.cs +++ b/dotnet/samples/Concepts/ChatCompletion/AzureOpenAI_CustomClient.cs @@ -5,27 +5,34 @@ using Azure.AI.OpenAI; using Microsoft.SemanticKernel; +#pragma warning disable CA5399 // HttpClient is created without enabling CheckCertificateRevocationList + namespace ChatCompletion; +/// +/// This example shows a way of using a Custom HttpClient and HttpHandler with Azure OpenAI Connector to capture +/// the request Uri and Headers for each request. +/// public sealed class AzureOpenAI_CustomClient(ITestOutputHelper output) : BaseTest(output) { [Fact] - public async Task RunAsync() + public async Task UsingCustomHttpClientWithAzureOpenAI() { - Console.WriteLine("======== Using a custom AzureOpenAI client ========"); - Assert.NotNull(TestConfiguration.AzureOpenAI.Endpoint); Assert.NotNull(TestConfiguration.AzureOpenAI.ChatDeploymentName); Assert.NotNull(TestConfiguration.AzureOpenAI.ApiKey); + Console.WriteLine($"======== Azure Open AI - {nameof(UsingCustomHttpClientWithAzureOpenAI)} ========"); + // Create an HttpClient and include your custom header(s) - var httpClient = new HttpClient(); - httpClient.DefaultRequestHeaders.Add("My-Custom-Header", "My Custom Value"); + using var myCustomHttpHandler = new MyCustomClientHttpHandler(Output); + using var myCustomClient = new HttpClient(handler: myCustomHttpHandler); + myCustomClient.DefaultRequestHeaders.Add("My-Custom-Header", "My Custom Value"); // Configure AzureOpenAIClient to use the customized HttpClient var clientOptions = new AzureOpenAIClientOptions { - Transport = new HttpClientPipelineTransport(httpClient), + Transport = new HttpClientPipelineTransport(myCustomClient), NetworkTimeout = TimeSpan.FromSeconds(30), RetryPolicy = new ClientRetryPolicy() }; @@ -48,6 +55,27 @@ public async Task RunAsync() ); Console.WriteLine(result.GetValue()); - httpClient.Dispose(); + myCustomClient.Dispose(); + } + + /// + /// Normally you would use a custom HttpClientHandler to add custom logic to your custom http client + /// This uses the ITestOutputHelper to write the requested URI to the test output + /// + /// The to write the requested URI to the test output + private sealed class MyCustomClientHttpHandler(ITestOutputHelper output) : HttpClientHandler + { + protected override async Task SendAsync(HttpRequestMessage request, CancellationToken cancellationToken) + { + output.WriteLine($"Requested URI: {request.RequestUri}"); + + request.Headers.Where(h => h.Key != "Authorization") + .ToList() + .ForEach(h => output.WriteLine($"{h.Key}: {string.Join(", ", h.Value)}")); + output.WriteLine("--------------------------------"); + + // Add custom logic here + return await base.SendAsync(request, cancellationToken); + } } } diff --git a/dotnet/samples/Concepts/ChatCompletion/HuggingFace_ChatCompletion.cs b/dotnet/samples/Concepts/ChatCompletion/HuggingFace_ChatCompletion.cs new file mode 100644 index 000000000000..4cb1c57f60e4 --- /dev/null +++ b/dotnet/samples/Concepts/ChatCompletion/HuggingFace_ChatCompletion.cs @@ -0,0 +1,97 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.SemanticKernel; +using Microsoft.SemanticKernel.ChatCompletion; +using Microsoft.SemanticKernel.Connectors.HuggingFace; + +namespace ChatCompletion; + +/// +/// This example shows a way of using Hugging Face connector with HuggingFace Text Generation Inference (TGI) API. +/// Follow steps in to setup HuggingFace local Text Generation Inference HTTP server. +/// +/// Install HuggingFace TGI via docker +/// docker run -d --gpus all --shm-size 1g -p 8080:80 -v "c:\temp\huggingface:/data" ghcr.io/huggingface/text-generation-inference:latest --model-id teknium/OpenHermes-2.5-Mistral-7B +/// Run the examples +/// +/// +public class HuggingFace_ChatCompletion(ITestOutputHelper output) : BaseTest(output) +{ + /// + /// This example shows how to setup LMStudio to use with the InvokeAsync (Non-Streaming). + /// + [Fact] +#pragma warning restore CS0419 // Ambiguous reference in cref attribute + public async Task UsingKernelNonStreamingWithHuggingFace() + { + Console.WriteLine($"======== HuggingFace - Chat Completion - {nameof(UsingKernelNonStreamingWithHuggingFace)} ========"); + + var endpoint = new Uri("http://localhost:8080"); // Update the endpoint if you chose a different port. (defaults to 8080) + var modelId = "teknium/OpenHermes-2.5-Mistral-7B"; // Update the modelId if you chose a different model. + + var kernel = Kernel.CreateBuilder() + .AddHuggingFaceChatCompletion( + model: modelId, + apiKey: null, + endpoint: endpoint) + .Build(); + + var prompt = @"Rewrite the text between triple backticks into a business mail. Use a professional tone, be clear and concise. + Sign the mail as AI Assistant. + + Text: ```{{$input}}```"; + + var mailFunction = kernel.CreateFunctionFromPrompt(prompt, new HuggingFacePromptExecutionSettings + { + TopP = 0.5f, + MaxTokens = 1000, + }); + + var response = await kernel.InvokeAsync(mailFunction, new() { ["input"] = "Tell David that I'm going to finish the business plan by the end of the week." }); + Console.WriteLine(response); + } + + /// + /// Sample showing how to use directly with a . + /// + [Fact] + public async Task UsingServiceNonStreamingWithHuggingFace() + { + Console.WriteLine($"======== HuggingFace - Chat Completion - {nameof(UsingServiceNonStreamingWithHuggingFace)} ========"); + + // HuggingFace local HTTP server endpoint + var endpoint = new Uri("http://localhost:8080"); // Update the endpoint if you chose a different port. (defaults to 8080) + var modelId = "teknium/OpenHermes-2.5-Mistral-7B"; // Update the modelId if you chose a different model. + + Kernel kernel = Kernel.CreateBuilder() + .AddHuggingFaceChatCompletion( + model: modelId, + endpoint: endpoint) + .Build(); + + var chatService = kernel.GetRequiredService(); + + Console.WriteLine("Chat content:"); + Console.WriteLine("------------------------"); + + var chatHistory = new ChatHistory("You are a librarian, expert about books"); + + // First user message + chatHistory.AddUserMessage("Hi, I'm looking for book suggestions"); + OutputLastMessage(chatHistory); + + // First assistant message + var reply = await chatService.GetChatMessageContentAsync(chatHistory); + chatHistory.Add(reply); + OutputLastMessage(chatHistory); + + // Second user message + chatHistory.AddUserMessage("I love history and philosophy, I'd like to learn something new about Greece, any suggestion"); + OutputLastMessage(chatHistory); + + // Second assistant message + reply = await chatService.GetChatMessageContentAsync(chatHistory); + chatHistory.Add(reply); + OutputLastMessage(chatHistory); + } +} diff --git a/dotnet/samples/Concepts/ChatCompletion/HuggingFace_ChatCompletionStreaming.cs b/dotnet/samples/Concepts/ChatCompletion/HuggingFace_ChatCompletionStreaming.cs new file mode 100644 index 000000000000..d508cb64060d --- /dev/null +++ b/dotnet/samples/Concepts/ChatCompletion/HuggingFace_ChatCompletionStreaming.cs @@ -0,0 +1,95 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.SemanticKernel; +using Microsoft.SemanticKernel.ChatCompletion; +using Microsoft.SemanticKernel.Connectors.HuggingFace; + +namespace ChatCompletion; + +/// +/// This example shows a way of using Hugging Face connector with HuggingFace Text Generation Inference (TGI) API. +/// Follow steps in to setup HuggingFace local Text Generation Inference HTTP server. +/// +/// Install HuggingFace TGI via docker +/// docker run -d --gpus all --shm-size 1g -p 8080:80 -v "c:\temp\huggingface:/data" ghcr.io/huggingface/text-generation-inference:latest --model-id teknium/OpenHermes-2.5-Mistral-7B +/// Run the examples +/// +/// +public class HuggingFace_ChatCompletionStreaming(ITestOutputHelper output) : BaseTest(output) +{ + /// + /// Sample showing how to use directly with a . + /// + [Fact] + public async Task UsingServiceStreamingWithHuggingFace() + { + Console.WriteLine($"======== HuggingFace - Chat Completion - {nameof(UsingServiceStreamingWithHuggingFace)} ========"); + + // HuggingFace local HTTP server endpoint + var endpoint = new Uri("http://localhost:8080"); // Update the endpoint if you chose a different port. (defaults to 8080) + var modelId = "teknium/OpenHermes-2.5-Mistral-7B"; // Update the modelId if you chose a different model. + + Kernel kernel = Kernel.CreateBuilder() + .AddHuggingFaceChatCompletion( + model: modelId, + endpoint: endpoint) + .Build(); + + var chatService = kernel.GetRequiredService(); + + Console.WriteLine("Chat content:"); + Console.WriteLine("------------------------"); + + var chatHistory = new ChatHistory("You are a librarian, expert about books"); + OutputLastMessage(chatHistory); + + // First user message + chatHistory.AddUserMessage("Hi, I'm looking for book suggestions"); + OutputLastMessage(chatHistory); + + // First assistant message + await StreamMessageOutputAsync(chatService, chatHistory, AuthorRole.Assistant); + + // Second user message + chatHistory.AddUserMessage("I love history and philosophy, I'd like to learn something new about Greece, any suggestion?"); + OutputLastMessage(chatHistory); + + // Second assistant message + await StreamMessageOutputAsync(chatService, chatHistory, AuthorRole.Assistant); + } + + /// + /// This example shows how to setup LMStudio to use with the InvokeAsync (Non-Streaming). + /// + [Fact] + public async Task UsingKernelStreamingWithHuggingFace() + { + Console.WriteLine($"======== HuggingFace - Chat Completion - {nameof(UsingKernelStreamingWithHuggingFace)} ========"); + + var endpoint = new Uri("http://localhost:8080"); // Update the endpoint if you chose a different port. (defaults to 8080) + var modelId = "teknium/OpenHermes-2.5-Mistral-7B"; // Update the modelId if you chose a different model. + + var kernel = Kernel.CreateBuilder() + .AddHuggingFaceChatCompletion( + model: modelId, + apiKey: null, + endpoint: endpoint) + .Build(); + + var prompt = @"Rewrite the text between triple backticks into a business mail. Use a professional tone, be clear and concise. + Sign the mail as AI Assistant. + + Text: ```{{$input}}```"; + + var mailFunction = kernel.CreateFunctionFromPrompt(prompt, new HuggingFacePromptExecutionSettings + { + TopP = 0.5f, + MaxTokens = 1000, + }); + + await foreach (var word in kernel.InvokeStreamingAsync(mailFunction, new() { ["input"] = "Tell David that I'm going to finish the business plan by the end of the week." })) + { + Console.WriteLine(word); + } + } +} diff --git a/dotnet/samples/Concepts/ChatCompletion/LMStudio_ChatCompletion.cs b/dotnet/samples/Concepts/ChatCompletion/LMStudio_ChatCompletion.cs new file mode 100644 index 000000000000..97562f75c847 --- /dev/null +++ b/dotnet/samples/Concepts/ChatCompletion/LMStudio_ChatCompletion.cs @@ -0,0 +1,92 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.SemanticKernel; +using Microsoft.SemanticKernel.ChatCompletion; +using Microsoft.SemanticKernel.Connectors.OpenAI; + +namespace ChatCompletion; + +/// +/// This example shows a way of using OpenAI connector with other APIs that supports the same ChatCompletion API standard from OpenAI. +/// +/// Install LMStudio Platform in your environment (As of now: 0.3.10) +/// Open LM Studio +/// Search and Download Llama2 model or any other +/// Update the modelId parameter with the model llm name loaded (i.e: llama-2-7b-chat) +/// Start the Local Server on http://localhost:1234 +/// Run the examples +/// +/// +public class LMStudio_ChatCompletion(ITestOutputHelper output) : BaseTest(output) +{ + /// + /// This example shows how to setup LMStudio to use with the InvokeAsync (Non-Streaming). + /// + [Fact] +#pragma warning restore CS0419 // Ambiguous reference in cref attribute + public async Task UsingKernelStreamingWithLMStudio() + { + Console.WriteLine($"======== LM Studio - Chat Completion - {nameof(UsingKernelStreamingWithLMStudio)} ========"); + + var modelId = "llama-2-7b-chat"; // Update the modelId if you chose a different model. + var endpoint = new Uri("http://localhost:1234/v1"); // Update the endpoint if you chose a different port. + + var kernel = Kernel.CreateBuilder() + .AddOpenAIChatCompletion( + modelId: modelId, + apiKey: null, + endpoint: endpoint) + .Build(); + + var prompt = @"Rewrite the text between triple backticks into a business mail. Use a professional tone, be clear and concise. + Sign the mail as AI Assistant. + + Text: ```{{$input}}```"; + + var mailFunction = kernel.CreateFunctionFromPrompt(prompt, new OpenAIPromptExecutionSettings + { + TopP = 0.5, + MaxTokens = 1000, + }); + + var response = await kernel.InvokeAsync(mailFunction, new() { ["input"] = "Tell David that I'm going to finish the business plan by the end of the week." }); + Console.WriteLine(response); + } + + /// + /// Sample showing how to use directly with a . + /// + [Fact] + public async Task UsingServiceNonStreamingWithLMStudio() + { + Console.WriteLine($"======== LM Studio - Chat Completion - {nameof(UsingServiceNonStreamingWithLMStudio)} ========"); + + var modelId = "llama-2-7b-chat"; // Update the modelId if you chose a different model. + var endpoint = new Uri("http://localhost:1234/v1"); // Update the endpoint if you chose a different port. + + OpenAIChatCompletionService chatService = new(modelId: modelId, apiKey: null, endpoint: endpoint); + + Console.WriteLine("Chat content:"); + Console.WriteLine("------------------------"); + + var chatHistory = new ChatHistory("You are a librarian, expert about books"); + + // First user message + chatHistory.AddUserMessage("Hi, I'm looking for book suggestions"); + OutputLastMessage(chatHistory); + + // First assistant message + var reply = await chatService.GetChatMessageContentAsync(chatHistory); + chatHistory.Add(reply); + OutputLastMessage(chatHistory); + + // Second user message + chatHistory.AddUserMessage("I love history and philosophy, I'd like to learn something new about Greece, any suggestion"); + OutputLastMessage(chatHistory); + + // Second assistant message + reply = await chatService.GetChatMessageContentAsync(chatHistory); + chatHistory.Add(reply); + OutputLastMessage(chatHistory); + } +} diff --git a/dotnet/samples/Concepts/ChatCompletion/LMStudio_ChatCompletionStreaming.cs b/dotnet/samples/Concepts/ChatCompletion/LMStudio_ChatCompletionStreaming.cs new file mode 100644 index 000000000000..8ac827d41120 --- /dev/null +++ b/dotnet/samples/Concepts/ChatCompletion/LMStudio_ChatCompletionStreaming.cs @@ -0,0 +1,97 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.SemanticKernel; +using Microsoft.SemanticKernel.ChatCompletion; +using Microsoft.SemanticKernel.Connectors.OpenAI; + +namespace ChatCompletion; + +/// +/// This example shows a way of using OpenAI connector with other APIs that supports the same ChatCompletion API standard from OpenAI. +/// +/// Install LMStudio Platform in your environment (As of now: 0.3.10) +/// Open LM Studio +/// Search and Download Llama2 model or any other +/// Update the modelId parameter with the model llm name loaded (i.e: llama-2-7b-chat) +/// Start the Local Server on http://localhost:1234 +/// Run the examples +/// +/// +public class LMStudio_ChatCompletionStreaming(ITestOutputHelper output) : BaseTest(output) +{ + /// + /// Sample showing how to use streaming directly with a . + /// + [Fact] + public async Task UsingServiceStreamingWithLMStudio() + { + Console.WriteLine($"======== LM Studio - Chat Completion - {nameof(UsingServiceStreamingWithLMStudio)} ========"); + + var modelId = "llama-2-7b-chat"; // Update the modelId if you chose a different model. + var endpoint = new Uri("http://localhost:1234/v1"); // Update the endpoint if you chose a different port. + + var kernel = Kernel.CreateBuilder() + .AddOpenAIChatCompletion( + modelId: modelId, + apiKey: null, + endpoint: endpoint) + .Build(); + + OpenAIChatCompletionService chatCompletionService = new(modelId: modelId, apiKey: null, endpoint: endpoint); + + Console.WriteLine("Chat content:"); + Console.WriteLine("------------------------"); + + var chatHistory = new ChatHistory("You are a librarian, expert about books"); + OutputLastMessage(chatHistory); + + // First user message + chatHistory.AddUserMessage("Hi, I'm looking for book suggestions"); + OutputLastMessage(chatHistory); + + // First assistant message + await StreamMessageOutputAsync(chatCompletionService, chatHistory, AuthorRole.Assistant); + + // Second user message + chatHistory.AddUserMessage("I love history and philosophy, I'd like to learn something new about Greece, any suggestion?"); + OutputLastMessage(chatHistory); + + // Second assistant message + await StreamMessageOutputAsync(chatCompletionService, chatHistory, AuthorRole.Assistant); + } + + /// + /// This example shows how to setup LMStudio to use with the Kernel InvokeAsync (Streaming). + /// + [Fact] + public async Task UsingKernelStreamingWithLMStudio() + { + Console.WriteLine($"======== LM Studio - Chat Completion - {nameof(UsingKernelStreamingWithLMStudio)} ========"); + + var modelId = "llama-2-7b-chat"; // Update the modelId if you chose a different model. + var endpoint = new Uri("http://localhost:1234/v1"); // Update the endpoint if you chose a different port. + + var kernel = Kernel.CreateBuilder() + .AddOpenAIChatCompletion( + modelId: modelId, + apiKey: null, + endpoint: endpoint) + .Build(); + + var prompt = @"Rewrite the text between triple backticks into a business mail. Use a professional tone, be clear and concise. + Sign the mail as AI Assistant. + + Text: ```{{$input}}```"; + + var mailFunction = kernel.CreateFunctionFromPrompt(prompt, new OpenAIPromptExecutionSettings + { + TopP = 0.5, + MaxTokens = 1000, + }); + + await foreach (var word in kernel.InvokeStreamingAsync(mailFunction, new() { ["input"] = "Tell David that I'm going to finish the business plan by the end of the week." })) + { + Console.WriteLine(word); + } + } +} diff --git a/dotnet/samples/Concepts/ChatCompletion/Ollama_ChatCompletion.cs b/dotnet/samples/Concepts/ChatCompletion/Ollama_ChatCompletion.cs index 79b72003ee89..307edbe4b229 100644 --- a/dotnet/samples/Concepts/ChatCompletion/Ollama_ChatCompletion.cs +++ b/dotnet/samples/Concepts/ChatCompletion/Ollama_ChatCompletion.cs @@ -8,7 +8,9 @@ namespace ChatCompletion; -// The following example shows how to use Semantic Kernel with Ollama Chat Completion API +/// +/// These examples demonstrate different ways of using chat completion with Ollama API. +/// public class Ollama_ChatCompletion(ITestOutputHelper output) : BaseTest(output) { /// @@ -85,7 +87,7 @@ public async Task ServicePromptWithInnerContentAsync() // Assistant message details // Ollama Sharp does not support non-streaming and always perform streaming calls, for this reason, the inner content is always a list of chunks. - var replyInnerContent = reply.InnerContent as List; + var replyInnerContent = reply.InnerContent as ChatDoneResponseStream; OutputInnerContent(replyInnerContent!); } @@ -146,42 +148,35 @@ public async Task ChatPromptWithInnerContentAsync() // Ollama Sharp does not support non-streaming and always perform streaming calls, for this reason, the inner content of a non-streaming result is a list of the generated chunks. var messageContent = functionResult.GetValue(); // Retrieves underlying chat message content from FunctionResult. - var replyInnerContent = messageContent!.InnerContent as List; // Retrieves inner content from ChatMessageContent. + var replyInnerContent = messageContent!.InnerContent as ChatDoneResponseStream; // Retrieves inner content from ChatMessageContent. OutputInnerContent(replyInnerContent!); } /// - /// Retrieve extra information from each streaming chunk response in a list of chunks. + /// Retrieve extra information from the final response. /// - /// List of streaming chunks provided as inner content of a chat message + /// The complete OllamaSharp response provided as inner content of a chat message /// /// This is a breaking glass scenario, any attempt on running with different versions of OllamaSharp library that introduces breaking changes /// may cause breaking changes in the code below. /// - private void OutputInnerContent(List innerContent) + private void OutputInnerContent(ChatDoneResponseStream innerContent) { - Console.WriteLine($"Model: {innerContent![0].Model}"); // Model doesn't change per chunk, so we can get it from the first chunk only - Console.WriteLine(" -- Chunk changing data -- "); - - innerContent.ForEach(streamChunk => - { - Console.WriteLine($"Message role: {streamChunk.Message.Role}"); - Console.WriteLine($"Message content: {streamChunk.Message.Content}"); - Console.WriteLine($"Created at: {streamChunk.CreatedAt}"); - Console.WriteLine($"Done: {streamChunk.Done}"); - /// The last message in the chunk is a type with additional metadata. - if (streamChunk is ChatDoneResponseStream doneStreamChunk) - { - Console.WriteLine($"Done Reason: {doneStreamChunk.DoneReason}"); - Console.WriteLine($"Eval count: {doneStreamChunk.EvalCount}"); - Console.WriteLine($"Eval duration: {doneStreamChunk.EvalDuration}"); - Console.WriteLine($"Load duration: {doneStreamChunk.LoadDuration}"); - Console.WriteLine($"Total duration: {doneStreamChunk.TotalDuration}"); - Console.WriteLine($"Prompt eval count: {doneStreamChunk.PromptEvalCount}"); - Console.WriteLine($"Prompt eval duration: {doneStreamChunk.PromptEvalDuration}"); - } - Console.WriteLine("------------------------"); - }); + Console.WriteLine($$""" + Model: {{innerContent.Model}} + Message role: {{innerContent.Message.Role}} + Message content: {{innerContent.Message.Content}} + Created at: {{innerContent.CreatedAt}} + Done: {{innerContent.Done}} + Done Reason: {{innerContent.DoneReason}} + Eval count: {{innerContent.EvalCount}} + Eval duration: {{innerContent.EvalDuration}} + Load duration: {{innerContent.LoadDuration}} + Total duration: {{innerContent.TotalDuration}} + Prompt eval count: {{innerContent.PromptEvalCount}} + Prompt eval duration: {{innerContent.PromptEvalDuration}} + ------------------------ + """); } } diff --git a/dotnet/samples/Concepts/ChatCompletion/Ollama_ChatCompletionStreaming.cs b/dotnet/samples/Concepts/ChatCompletion/Ollama_ChatCompletionStreaming.cs index 9d6e8cf9e845..1713d9a03052 100644 --- a/dotnet/samples/Concepts/ChatCompletion/Ollama_ChatCompletionStreaming.cs +++ b/dotnet/samples/Concepts/ChatCompletion/Ollama_ChatCompletionStreaming.cs @@ -9,7 +9,7 @@ namespace ChatCompletion; /// -/// These examples demonstrate the ways different content types are streamed by Ollama via the chat completion service. +/// These examples demonstrate different ways of using chat completion with Ollama API. /// public class Ollama_ChatCompletionStreaming(ITestOutputHelper output) : BaseTest(output) { @@ -17,11 +17,11 @@ public class Ollama_ChatCompletionStreaming(ITestOutputHelper output) : BaseTest /// This example demonstrates chat completion streaming using Ollama. /// [Fact] - public Task StreamChatAsync() + public async Task UsingServiceStreamingWithOllama() { Assert.NotNull(TestConfiguration.Ollama.ModelId); - Console.WriteLine("======== Ollama - Chat Completion Streaming ========"); + Console.WriteLine($"======== Ollama - Chat Completion - {nameof(UsingServiceStreamingWithOllama)} ========"); using var ollamaClient = new OllamaApiClient( uriString: TestConfiguration.Ollama.Endpoint, @@ -29,22 +29,39 @@ public Task StreamChatAsync() var chatService = ollamaClient.AsChatCompletionService(); - return this.StartStreamingChatAsync(chatService); + Console.WriteLine("Chat content:"); + Console.WriteLine("------------------------"); + + var chatHistory = new ChatHistory("You are a librarian, expert about books"); + this.OutputLastMessage(chatHistory); + + // First user message + chatHistory.AddUserMessage("Hi, I'm looking for book suggestions"); + this.OutputLastMessage(chatHistory); + + // First assistant message + await StreamMessageOutputAsync(chatService, chatHistory, AuthorRole.Assistant); + + // Second user message + chatHistory.AddUserMessage("I love history and philosophy, I'd like to learn something new about Greece, any suggestion?"); + this.OutputLastMessage(chatHistory); + + // Second assistant message + await StreamMessageOutputAsync(chatService, chatHistory, AuthorRole.Assistant); } /// - /// This example demonstrates retrieving extra information chat completion streaming using Ollama. + /// This example demonstrates retrieving underlying library information through chat completion streaming inner contents. /// /// - /// This is a breaking glass scenario, any attempt on running with different versions of OllamaSharp library that introduces breaking changes - /// may cause breaking changes in the code below. + /// This is a breaking glass scenario and is more susceptible to break on newer versions of OllamaSharp library. /// [Fact] - public async Task StreamChatWithInnerContentAsync() + public async Task UsingServiceStreamingInnerContentsWithOllama() { Assert.NotNull(TestConfiguration.Ollama.ModelId); - Console.WriteLine("======== Ollama - Chat Completion Streaming ========"); + Console.WriteLine($"======== Ollama - Chat Completion - {nameof(UsingServiceStreamingInnerContentsWithOllama)} ========"); using var ollamaClient = new OllamaApiClient( uriString: TestConfiguration.Ollama.Endpoint, @@ -70,13 +87,15 @@ public async Task StreamChatWithInnerContentAsync() } /// - /// Demonstrates how you can template a chat history call while using the kernel for invocation. + /// Demonstrates how you can template a chat history call while using the for invocation. /// [Fact] - public async Task StreamChatPromptAsync() + public async Task UsingKernelChatPromptStreamingWithOllama() { Assert.NotNull(TestConfiguration.Ollama.ModelId); + Console.WriteLine($"======== Ollama - Chat Completion - {nameof(UsingKernelChatPromptStreamingWithOllama)} ========"); + StringBuilder chatPrompt = new(""" You are a librarian, expert about books Hi, I'm looking for book suggestions @@ -99,17 +118,18 @@ public async Task StreamChatPromptAsync() } /// - /// Demonstrates how you can template a chat history call and get extra information from the response while using the kernel for invocation. + /// This example demonstrates retrieving underlying library information through chat completion streaming inner contents. /// /// - /// This is a breaking glass scenario, any attempt on running with different versions of OllamaSharp library that introduces breaking changes - /// may cause breaking changes in the code below. + /// This is a breaking glass scenario and is more susceptible to break on newer versions of OllamaSharp library. /// [Fact] - public async Task StreamChatPromptWithInnerContentAsync() + public async Task UsingKernelChatPromptStreamingInnerContentsWithOllama() { Assert.NotNull(TestConfiguration.Ollama.ModelId); + Console.WriteLine($"======== Ollama - Chat Completion - {nameof(UsingKernelChatPromptStreamingInnerContentsWithOllama)} ========"); + StringBuilder chatPrompt = new(""" You are a librarian, expert about books Hi, I'm looking for book suggestions @@ -139,11 +159,11 @@ public async Task StreamChatPromptWithInnerContentAsync() /// and alternatively via the StreamingChatMessageContent.Items property. /// [Fact] - public async Task StreamTextFromChatAsync() + public async Task UsingStreamingTextFromChatCompletionWithOllama() { Assert.NotNull(TestConfiguration.Ollama.ModelId); - Console.WriteLine("======== Stream Text from Chat Content ========"); + Console.WriteLine($"======== Ollama - Chat Completion - {nameof(UsingStreamingTextFromChatCompletionWithOllama)} ========"); using var ollamaClient = new OllamaApiClient( uriString: TestConfiguration.Ollama.Endpoint, @@ -168,53 +188,6 @@ public async Task StreamTextFromChatAsync() } } - private async Task StartStreamingChatAsync(IChatCompletionService chatCompletionService) - { - Console.WriteLine("Chat content:"); - Console.WriteLine("------------------------"); - - var chatHistory = new ChatHistory("You are a librarian, expert about books"); - this.OutputLastMessage(chatHistory); - - // First user message - chatHistory.AddUserMessage("Hi, I'm looking for book suggestions"); - this.OutputLastMessage(chatHistory); - - // First assistant message - await StreamMessageOutputAsync(chatCompletionService, chatHistory, AuthorRole.Assistant); - - // Second user message - chatHistory.AddUserMessage("I love history and philosophy, I'd like to learn something new about Greece, any suggestion?"); - this.OutputLastMessage(chatHistory); - - // Second assistant message - await StreamMessageOutputAsync(chatCompletionService, chatHistory, AuthorRole.Assistant); - } - - private async Task StreamMessageOutputAsync(IChatCompletionService chatCompletionService, ChatHistory chatHistory, AuthorRole authorRole) - { - bool roleWritten = false; - string fullMessage = string.Empty; - - await foreach (var chatUpdate in chatCompletionService.GetStreamingChatMessageContentsAsync(chatHistory)) - { - if (!roleWritten && chatUpdate.Role.HasValue) - { - Console.Write($"{chatUpdate.Role.Value}: {chatUpdate.Content}"); - roleWritten = true; - } - - if (chatUpdate.Content is { Length: > 0 }) - { - fullMessage += chatUpdate.Content; - Console.Write(chatUpdate.Content); - } - } - - Console.WriteLine("\n------------------------"); - chatHistory.AddMessage(authorRole, fullMessage); - } - private async Task StreamMessageOutputFromKernelAsync(Kernel kernel, string prompt) { bool roleWritten = false; @@ -249,22 +222,26 @@ private async Task StreamMessageOutputFromKernelAsync(Kernel kernel, str /// private void OutputInnerContent(ChatResponseStream streamChunk) { - Console.WriteLine($"Model: {streamChunk.Model}"); - Console.WriteLine($"Message role: {streamChunk.Message.Role}"); - Console.WriteLine($"Message content: {streamChunk.Message.Content}"); - Console.WriteLine($"Created at: {streamChunk.CreatedAt}"); - Console.WriteLine($"Done: {streamChunk.Done}"); + Console.WriteLine($$""" + Model: {{streamChunk.Model}} + Message role: {{streamChunk.Message.Role}} + Message content: {{streamChunk.Message.Content}} + Created at: {{streamChunk.CreatedAt}} + Done: {{streamChunk.Done}} + """); /// The last message in the chunk is a type with additional metadata. if (streamChunk is ChatDoneResponseStream doneStream) { - Console.WriteLine($"Done Reason: {doneStream.DoneReason}"); - Console.WriteLine($"Eval count: {doneStream.EvalCount}"); - Console.WriteLine($"Eval duration: {doneStream.EvalDuration}"); - Console.WriteLine($"Load duration: {doneStream.LoadDuration}"); - Console.WriteLine($"Total duration: {doneStream.TotalDuration}"); - Console.WriteLine($"Prompt eval count: {doneStream.PromptEvalCount}"); - Console.WriteLine($"Prompt eval duration: {doneStream.PromptEvalDuration}"); + Console.WriteLine($$""" + Done Reason: {{doneStream.DoneReason}} + Eval count: {{doneStream.EvalCount}} + Eval duration: {{doneStream.EvalDuration}} + Load duration: {{doneStream.LoadDuration}} + Total duration: {{doneStream.TotalDuration}} + Prompt eval count: {{doneStream.PromptEvalCount}} + Prompt eval duration: {{doneStream.PromptEvalDuration}} + """); } Console.WriteLine("------------------------"); } diff --git a/dotnet/samples/Concepts/ChatCompletion/OpenAI_ChatCompletion.cs b/dotnet/samples/Concepts/ChatCompletion/OpenAI_ChatCompletion.cs index cf4a571d38c4..47c047d5271c 100644 --- a/dotnet/samples/Concepts/ChatCompletion/OpenAI_ChatCompletion.cs +++ b/dotnet/samples/Concepts/ChatCompletion/OpenAI_ChatCompletion.cs @@ -7,7 +7,9 @@ namespace ChatCompletion; -// The following example shows how to use Semantic Kernel with OpenAI API +/// +/// These examples demonstrate different ways of using chat completion with OpenAI API. +/// public class OpenAI_ChatCompletion(ITestOutputHelper output) : BaseTest(output) { /// @@ -21,9 +23,30 @@ public async Task ServicePromptAsync() Console.WriteLine("======== Open AI - Chat Completion ========"); - OpenAIChatCompletionService chatCompletionService = new(TestConfiguration.OpenAI.ChatModelId, TestConfiguration.OpenAI.ApiKey); + OpenAIChatCompletionService chatService = new(TestConfiguration.OpenAI.ChatModelId, TestConfiguration.OpenAI.ApiKey); + + Console.WriteLine("Chat content:"); + Console.WriteLine("------------------------"); + + var chatHistory = new ChatHistory("You are a librarian, expert about books"); + + // First user message + chatHistory.AddUserMessage("Hi, I'm looking for book suggestions"); + OutputLastMessage(chatHistory); + + // First assistant message + var reply = await chatService.GetChatMessageContentAsync(chatHistory); + chatHistory.Add(reply); + OutputLastMessage(chatHistory); - await StartChatAsync(chatCompletionService); + // Second user message + chatHistory.AddUserMessage("I love history and philosophy, I'd like to learn something new about Greece, any suggestion"); + OutputLastMessage(chatHistory); + + // Second assistant message + reply = await chatService.GetChatMessageContentAsync(chatHistory); + chatHistory.Add(reply); + OutputLastMessage(chatHistory); } /// @@ -147,32 +170,6 @@ public async Task ChatPromptStoreWithMetadataAsync() OutputInnerContent(replyInnerContent!); } - private async Task StartChatAsync(IChatCompletionService chatGPT) - { - Console.WriteLine("Chat content:"); - Console.WriteLine("------------------------"); - - var chatHistory = new ChatHistory("You are a librarian, expert about books"); - - // First user message - chatHistory.AddUserMessage("Hi, I'm looking for book suggestions"); - OutputLastMessage(chatHistory); - - // First assistant message - var reply = await chatGPT.GetChatMessageContentAsync(chatHistory); - chatHistory.Add(reply); - OutputLastMessage(chatHistory); - - // Second user message - chatHistory.AddUserMessage("I love history and philosophy, I'd like to learn something new about Greece, any suggestion"); - OutputLastMessage(chatHistory); - - // Second assistant message - reply = await chatGPT.GetChatMessageContentAsync(chatHistory); - chatHistory.Add(reply); - OutputLastMessage(chatHistory); - } - /// /// Retrieve extra information from a inner content of type . /// @@ -183,19 +180,21 @@ private async Task StartChatAsync(IChatCompletionService chatGPT) /// private void OutputInnerContent(OpenAI.Chat.ChatCompletion innerContent) { - Console.WriteLine($"Message role: {innerContent.Role}"); // Available as a property of ChatMessageContent - Console.WriteLine($"Message content: {innerContent.Content[0].Text}"); // Available as a property of ChatMessageContent - - Console.WriteLine($"Model: {innerContent.Model}"); // Model doesn't change per chunk, so we can get it from the first chunk only - Console.WriteLine($"Created At: {innerContent.CreatedAt}"); - - Console.WriteLine($"Finish reason: {innerContent.FinishReason}"); - Console.WriteLine($"Input tokens usage: {innerContent.Usage.InputTokenCount}"); - Console.WriteLine($"Output tokens usage: {innerContent.Usage.OutputTokenCount}"); - Console.WriteLine($"Total tokens usage: {innerContent.Usage.TotalTokenCount}"); - Console.WriteLine($"Refusal: {innerContent.Refusal} "); - Console.WriteLine($"Id: {innerContent.Id}"); - Console.WriteLine($"System fingerprint: {innerContent.SystemFingerprint}"); + Console.WriteLine($$""" + Message role: {{innerContent.Role}} // Available as a property of ChatMessageContent + Message content: {{innerContent.Content[0].Text}} // Available as a property of ChatMessageContent + + Model: {{innerContent.Model}} // Model doesn't change per chunk, so we can get it from the first chunk only + Created At: {{innerContent.CreatedAt}} + + Finish reason: {{innerContent.FinishReason}} + Input tokens usage: {{innerContent.Usage.InputTokenCount}} + Output tokens usage: {{innerContent.Usage.OutputTokenCount}} + Total tokens usage: {{innerContent.Usage.TotalTokenCount}} + Refusal: {{innerContent.Refusal}} + Id: {{innerContent.Id}} + System fingerprint: {{innerContent.SystemFingerprint}} + """); if (innerContent.ContentTokenLogProbabilities.Count > 0) { diff --git a/dotnet/samples/Concepts/ChatCompletion/OpenAI_ChatCompletionStreaming.cs b/dotnet/samples/Concepts/ChatCompletion/OpenAI_ChatCompletionStreaming.cs index 0e9fe0326290..7773fadbb76f 100644 --- a/dotnet/samples/Concepts/ChatCompletion/OpenAI_ChatCompletionStreaming.cs +++ b/dotnet/samples/Concepts/ChatCompletion/OpenAI_ChatCompletionStreaming.cs @@ -8,7 +8,7 @@ namespace ChatCompletion; /// -/// These examples demonstrate the ways different content types are streamed by OpenAI LLM via the chat completion service. +/// These examples demonstrate different ways of using streaming chat completion with OpenAI API. /// public class OpenAI_ChatCompletionStreaming(ITestOutputHelper output) : BaseTest(output) { @@ -214,37 +214,6 @@ public async Task StreamFunctionCallContentAsync() } } - private async Task StreamMessageOutputAsync(OpenAIChatCompletionService chatCompletionService, ChatHistory chatHistory, AuthorRole authorRole) - { - bool roleWritten = false; - string fullMessage = string.Empty; - - await foreach (var chatUpdate in chatCompletionService.GetStreamingChatMessageContentsAsync(chatHistory)) - { - if (!roleWritten && chatUpdate.Role.HasValue) - { - Console.Write($"{chatUpdate.Role.Value}: {chatUpdate.Content}"); - roleWritten = true; - } - - if (chatUpdate.Content is { Length: > 0 }) - { - fullMessage += chatUpdate.Content; - Console.Write(chatUpdate.Content); - } - - // The last message in the chunk has the usage metadata. - // https://platform.openai.com/docs/api-reference/chat/create#chat-create-stream_options - if (chatUpdate.Metadata?["Usage"] is not null) - { - Console.WriteLine(chatUpdate.Metadata["Usage"]?.AsJson()); - } - } - - Console.WriteLine("\n------------------------"); - chatHistory.AddMessage(authorRole, fullMessage); - } - private async Task StreamMessageOutputFromKernelAsync(Kernel kernel, string prompt) { bool roleWritten = false; diff --git a/dotnet/samples/Concepts/ChatCompletion/OpenAI_ChatCompletion_WithReasoning.cs b/dotnet/samples/Concepts/ChatCompletion/OpenAI_ChatCompletionWithReasoning.cs similarity index 97% rename from dotnet/samples/Concepts/ChatCompletion/OpenAI_ChatCompletion_WithReasoning.cs rename to dotnet/samples/Concepts/ChatCompletion/OpenAI_ChatCompletionWithReasoning.cs index 547df991744c..b28b45363204 100644 --- a/dotnet/samples/Concepts/ChatCompletion/OpenAI_ChatCompletion_WithReasoning.cs +++ b/dotnet/samples/Concepts/ChatCompletion/OpenAI_ChatCompletionWithReasoning.cs @@ -9,7 +9,7 @@ namespace ChatCompletion; // The following example shows how to use Semantic Kernel with OpenAI API -public class OpenAI_ChatCompletion_WithReasoning(ITestOutputHelper output) : BaseTest(output) +public class OpenAI_ChatCompletionWithReasoning(ITestOutputHelper output) : BaseTest(output) { /// /// Sample showing how to use with chat completion and chat prompt syntax. diff --git a/dotnet/samples/Concepts/ChatCompletion/OpenAI_CustomClient.cs b/dotnet/samples/Concepts/ChatCompletion/OpenAI_CustomClient.cs index c36b1d945c67..fa014ede905f 100644 --- a/dotnet/samples/Concepts/ChatCompletion/OpenAI_CustomClient.cs +++ b/dotnet/samples/Concepts/ChatCompletion/OpenAI_CustomClient.cs @@ -5,26 +5,33 @@ using Microsoft.SemanticKernel; using OpenAI; +#pragma warning disable CA5399 // HttpClient is created without enabling CheckCertificateRevocationList + namespace ChatCompletion; +/// +/// This example shows a way of using a Custom HttpClient and HttpHandler with OpenAI Connector to capture +/// the request Uri and Headers for each request. +/// public sealed class OpenAI_CustomClient(ITestOutputHelper output) : BaseTest(output) { [Fact] - public async Task RunAsync() + public async Task UsingCustomHttpClientWithOpenAI() { Assert.NotNull(TestConfiguration.OpenAI.ChatModelId); Assert.NotNull(TestConfiguration.OpenAI.ApiKey); - Console.WriteLine("======== Using a custom OpenAI client ========"); + Console.WriteLine($"======== Open AI - {nameof(UsingCustomHttpClientWithOpenAI)} ========"); // Create an HttpClient and include your custom header(s) - using var httpClient = new HttpClient(); - httpClient.DefaultRequestHeaders.Add("My-Custom-Header", "My Custom Value"); + using var myCustomHttpHandler = new MyCustomClientHttpHandler(Output); + using var myCustomClient = new HttpClient(handler: myCustomHttpHandler); + myCustomClient.DefaultRequestHeaders.Add("My-Custom-Header", "My Custom Value"); // Configure AzureOpenAIClient to use the customized HttpClient var clientOptions = new OpenAIClientOptions { - Transport = new HttpClientPipelineTransport(httpClient), + Transport = new HttpClientPipelineTransport(myCustomClient), NetworkTimeout = TimeSpan.FromSeconds(30), RetryPolicy = new ClientRetryPolicy() }; @@ -45,8 +52,30 @@ public async Task RunAsync() kernel.Plugins["FunPlugin"]["Excuses"], new() { ["input"] = "I have no homework" } ); + Console.WriteLine(result.GetValue()); - httpClient.Dispose(); + myCustomClient.Dispose(); + } + + /// + /// Normally you would use a custom HttpClientHandler to add custom logic to your custom http client + /// This uses the ITestOutputHelper to write the requested URI to the test output + /// + /// The to write the requested URI to the test output + private sealed class MyCustomClientHttpHandler(ITestOutputHelper output) : HttpClientHandler + { + protected override async Task SendAsync(HttpRequestMessage request, CancellationToken cancellationToken) + { + output.WriteLine($"Requested URI: {request.RequestUri}"); + + request.Headers.Where(h => h.Key != "Authorization") + .ToList() + .ForEach(h => output.WriteLine($"{h.Key}: {string.Join(", ", h.Value)}")); + output.WriteLine("--------------------------------"); + + // Add custom logic here + return await base.SendAsync(request, cancellationToken); + } } } diff --git a/dotnet/samples/Concepts/Concepts.csproj b/dotnet/samples/Concepts/Concepts.csproj index b5cfce829772..728dce6b41fb 100644 --- a/dotnet/samples/Concepts/Concepts.csproj +++ b/dotnet/samples/Concepts/Concepts.csproj @@ -8,7 +8,7 @@ false true - $(NoWarn);CS8618,IDE0009,CA1051,CA1050,CA1707,CA1054,CA2007,VSTHRD111,CS1591,RCS1110,RCS1243,CA5394,SKEXP0001,SKEXP0010,SKEXP0020,SKEXP0040,SKEXP0050,SKEXP0060,SKEXP0070,SKEXP0101,SKEXP0110,OPENAI001,CA1724 + $(NoWarn);CS8618,IDE0009,IDE1006,CA1051,CA1050,CA1707,CA1054,CA2007,VSTHRD111,CS1591,RCS1110,RCS1243,CA5394,SKEXP0001,SKEXP0010,SKEXP0020,SKEXP0040,SKEXP0050,SKEXP0060,SKEXP0070,SKEXP0101,SKEXP0110,OPENAI001,CA1724 Library 5ee045b0-aea3-4f08-8d31-32d1a6f8fed0 diff --git a/dotnet/samples/Concepts/LocalModels/HuggingFace_ChatCompletionWithTGI.cs b/dotnet/samples/Concepts/LocalModels/HuggingFace_ChatCompletionWithTGI.cs deleted file mode 100644 index c1b3372d071e..000000000000 --- a/dotnet/samples/Concepts/LocalModels/HuggingFace_ChatCompletionWithTGI.cs +++ /dev/null @@ -1,89 +0,0 @@ -// Copyright (c) Microsoft. All rights reserved. - -using Microsoft.SemanticKernel; -using Microsoft.SemanticKernel.ChatCompletion; - -#pragma warning disable format // Format item can be simplified -#pragma warning disable CA1861 // Avoid constant arrays as arguments - -namespace LocalModels; - -// The following example shows how to use Semantic Kernel with HuggingFace API. -public class HuggingFace_ChatCompletionWithTGI(ITestOutputHelper output) : BaseTest(output) -{ - /// - /// Follow steps in to setup HuggingFace local Text Generation Inference HTTP server. - /// - [Fact(Skip = "Requires TGI (text generation inference) deployment")] - public async Task RunTGI_ChatCompletionAsync() - { - Console.WriteLine("\n======== HuggingFace - TGI Chat Completion ========\n"); - - // This example was run against one of the chat completion (Message API) supported models from HuggingFace, listed in here: - // Starting a Local Docker i.e: - // docker run --gpus all --shm-size 1g -p 8080:80 -v "F:\temp\huggingface:/data" ghcr.io/huggingface/text-generation-inference:1.4 --model-id teknium/OpenHermes-2.5-Mistral-7B - - // HuggingFace local HTTP server endpoint - var endpoint = new Uri("http://localhost:8080"); - - const string Model = "teknium/OpenHermes-2.5-Mistral-7B"; - - Kernel kernel = Kernel.CreateBuilder() - .AddHuggingFaceChatCompletion( - model: Model, - endpoint: endpoint) - .Build(); - - var chatCompletion = kernel.GetRequiredService(); - var chatHistory = new ChatHistory("You are a helpful assistant.") - { - new ChatMessageContent(AuthorRole.User, "What is deep learning?") - }; - - var result = await chatCompletion.GetChatMessageContentAsync(chatHistory); - - Console.WriteLine(result.Role); - Console.WriteLine(result.Content); - } - - /// - /// Follow steps in to setup HuggingFace local Text Generation Inference HTTP server. - /// - [Fact(Skip = "Requires TGI (text generation inference) deployment")] - public async Task RunTGI_StreamingChatCompletionAsync() - { - Console.WriteLine("\n======== HuggingFace - TGI Chat Completion Streaming ========\n"); - - // This example was run against one of the chat completion (Message API) supported models from HuggingFace, listed in here: - // Starting a Local Docker i.e: - // docker run --gpus all --shm-size 1g -p 8080:80 -v "F:\temp\huggingface:/data" ghcr.io/huggingface/text-generation-inference:1.4 --model-id teknium/OpenHermes-2.5-Mistral-7B - - // HuggingFace local HTTP server endpoint - var endpoint = new Uri("http://localhost:8080"); - - const string Model = "teknium/OpenHermes-2.5-Mistral-7B"; - - Kernel kernel = Kernel.CreateBuilder() - .AddHuggingFaceChatCompletion( - model: Model, - endpoint: endpoint) - .Build(); - - var chatCompletion = kernel.GetRequiredService(); - var chatHistory = new ChatHistory("You are a helpful assistant.") - { - new ChatMessageContent(AuthorRole.User, "What is deep learning?") - }; - - AuthorRole? role = null; - await foreach (var chatMessageChunk in chatCompletion.GetStreamingChatMessageContentsAsync(chatHistory)) - { - if (role is null) - { - role = chatMessageChunk.Role; - Console.Write(role); - } - Console.Write(chatMessageChunk.Content); - } - } -} diff --git a/dotnet/samples/Concepts/LocalModels/MultipleProviders_ChatCompletion.cs b/dotnet/samples/Concepts/LocalModels/MultipleProviders_ChatCompletion.cs deleted file mode 100644 index ec118d27e977..000000000000 --- a/dotnet/samples/Concepts/LocalModels/MultipleProviders_ChatCompletion.cs +++ /dev/null @@ -1,95 +0,0 @@ -// Copyright (c) Microsoft. All rights reserved. - -using Microsoft.SemanticKernel; -using Microsoft.SemanticKernel.Connectors.OpenAI; - -namespace LocalModels; - -/// -/// This example shows a way of using OpenAI connector with other APIs that supports the same ChatCompletion Message API standard from OpenAI. -/// -/// To proceed with this example will be necessary to follow those steps: -/// 1. Install LMStudio Platform in your environment -/// 2. Open LM Studio -/// 3. Search and Download both Phi2 and Llama2 models (preferably the ones that uses 8GB RAM or more) -/// 4. Start the Message API Server on http://localhost:1234 -/// 5. Run the examples. -/// -/// OR -/// -/// 1. Start the Ollama Message API Server on http://localhost:11434 using docker -/// 2. docker run -d --gpus=all -v "d:\temp\ollama:/root/.ollama" -p 11434:11434 --name ollama ollama/ollama -/// 3. Set Llama2 as the current ollama model: docker exec -it ollama ollama run llama2 -/// 4. Run the Ollama examples. -/// -/// OR -/// -/// 1. Start the LocalAI Message API Server on http://localhost:8080 -/// 2. docker run -ti -p 8080:8080 localai/localai:v2.12.3-ffmpeg-core phi-2 -/// 3. Run the LocalAI examples. -/// -public class MultipleProviders_ChatCompletion(ITestOutputHelper output) : BaseTest(output) -{ - [Theory(Skip = "Manual configuration needed")] - [InlineData("LMStudio", "http://localhost:1234", "llama2")] // Setup Llama2 as the model in LM Studio UI and start the Message API Server on http://localhost:1234 - [InlineData("Ollama", "http://localhost:11434", "llama2")] // Start the Ollama Message API Server on http://localhost:11434 using docker - [InlineData("LocalAI", "http://localhost:8080", "phi-2")] - public async Task LocalModel_ExampleAsync(string messageAPIPlatform, string url, string modelId) - { - Console.WriteLine($"Example using local {messageAPIPlatform}"); - // Setup Llama2 as the model in LM Studio UI. - - var kernel = Kernel.CreateBuilder() - .AddOpenAIChatCompletion( - modelId: modelId, - apiKey: null, - endpoint: new Uri(url)) - .Build(); - - var prompt = @"Rewrite the text between triple backticks into a business mail. Use a professional tone, be clear and concise. - Sign the mail as AI Assistant. - - Text: ```{{$input}}```"; - - var mailFunction = kernel.CreateFunctionFromPrompt(prompt, new OpenAIPromptExecutionSettings - { - TopP = 0.5, - MaxTokens = 1000, - }); - - var response = await kernel.InvokeAsync(mailFunction, new() { ["input"] = "Tell David that I'm going to finish the business plan by the end of the week." }); - Console.WriteLine(response); - } - - [Theory(Skip = "Manual configuration needed")] - [InlineData("LMStudio", "http://localhost:1234", "llama2")] // Setup Llama2 as the model in LM Studio UI and start the Message API Server on http://localhost:1234 - [InlineData("Ollama", "http://localhost:11434", "llama2")] // Start the Ollama Message API Server on http://localhost:11434 using docker - [InlineData("LocalAI", "http://localhost:8080", "phi-2")] - public async Task LocalModel_StreamingExampleAsync(string messageAPIPlatform, string url, string modelId) - { - Console.WriteLine($"Example using local {messageAPIPlatform}"); - - var kernel = Kernel.CreateBuilder() - .AddOpenAIChatCompletion( - modelId: modelId, - apiKey: null, - endpoint: new Uri(url)) - .Build(); - - var prompt = @"Rewrite the text between triple backticks into a business mail. Use a professional tone, be clear and concise. - Sign the mail as AI Assistant. - - Text: ```{{$input}}```"; - - var mailFunction = kernel.CreateFunctionFromPrompt(prompt, new OpenAIPromptExecutionSettings - { - TopP = 0.5, - MaxTokens = 1000, - }); - - await foreach (var word in kernel.InvokeStreamingAsync(mailFunction, new() { ["input"] = "Tell David that I'm going to finish the business plan by the end of the week." })) - { - Console.WriteLine(word); - } - } -} diff --git a/dotnet/samples/Concepts/README.md b/dotnet/samples/Concepts/README.md index e2063fc09200..2c213d423790 100644 --- a/dotnet/samples/Concepts/README.md +++ b/dotnet/samples/Concepts/README.md @@ -55,6 +55,7 @@ dotnet test -l "console;verbosity=detailed" --filter "FullyQualifiedName=ChatCom - [AzureAIInference_ChatCompletion](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/AzureAIInference_ChatCompletion.cs) - [AzureAIInference_ChatCompletionStreaming](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/AzureAIInference_ChatCompletionStreaming.cs) - [AzureOpenAI_ChatCompletion](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/AzureOpenAI_ChatCompletion.cs) +- [AzureOpenAI_ChatCompletionWithReasoning](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/AzureOpenAI_ChatCompletionWithReasoning.cs) - [AzureOpenAI_ChatCompletionStreaming](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/AzureOpenAI_ChatCompletionStreaming.cs) - [AzureOpenAI_CustomClient](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/AzureOpenAI_CustomClient.cs) - [AzureOpenAIWithData_ChatCompletion](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/AzureOpenAIWithData_ChatCompletion.cs) @@ -68,23 +69,31 @@ dotnet test -l "console;verbosity=detailed" --filter "FullyQualifiedName=ChatCom - [Google_GeminiChatCompletionStreaming](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/Google_GeminiChatCompletionStreaming.cs) - [Google_GeminiGetModelResult](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/Google_GeminiGetModelResult.cs) - [Google_GeminiVision](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/Google_GeminiVision.cs) +- [HuggingFace_ChatCompletion](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/HuggingFace_ChatCompletion.cs) +- [HuggingFace_ChatCompletionStreaming](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/HuggingFace_ChatCompletionStreaming.cs) - [HybridCompletion_Fallback](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/HybridCompletion_Fallback.cs) -- [OpenAI_ChatCompletion](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/OpenAI_ChatCompletion.cs) -- [OpenAI_ChatCompletionStreaming](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/OpenAI_ChatCompletionStreaming.cs) -- [OpenAI_ChatCompletionWithVision](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/OpenAI_ChatCompletionWithVision.cs) -- [OpenAI_CustomClient](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/OpenAI_CustomClient.cs) -- [OpenAI_UsingLogitBias](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/OpenAI_UsingLogitBias.cs) -- [OpenAI_FunctionCalling](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/OpenAI_FunctionCalling.cs) -- [OpenAI_ReasonedFunctionCalling](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/OpenAI_ReasonedFunctionCalling.cs) -- [MultipleProviders_ChatHistoryReducer](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/MuiltipleProviders_ChatHistoryReducer.cs) +- [LMStudio_ChatCompletion](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/LMStudio_ChatCompletion.cs) +- [LMStudio_ChatCompletionStreaming](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/LMStudio_ChatCompletionStreaming.cs) - [MistralAI_ChatCompletion](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/MistralAI_ChatCompletion.cs) - [MistralAI_ChatPrompt](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/MistralAI_ChatPrompt.cs) - [MistralAI_FunctionCalling](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/MistralAI_FunctionCalling.cs) - [MistralAI_StreamingFunctionCalling](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/MistralAI_StreamingFunctionCalling.cs) -- [Onnx_ChatCompletion](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletion.cs) -- [Onnx_ChatCompletionStreaming](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletionStreaming.cs) +- [MultipleProviders_ChatHistoryReducer](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/MuiltipleProviders_ChatHistoryReducer.cs) - [Ollama_ChatCompletion](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/Ollama_ChatCompletion.cs) - [Ollama_ChatCompletionStreaming](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/Ollama_ChatCompletionStreaming.cs) +- [Onnx_ChatCompletion](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletion.cs) +- [Onnx_ChatCompletionStreaming](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletionStreaming.cs) +- [OpenAI_ChatCompletion](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/OpenAI_ChatCompletion.cs) +- [OpenAI_ChatCompletionStreaming](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/OpenAI_ChatCompletionStreaming.cs) +- [OpenAI_ChatCompletionWithReasoning](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/OpenAI_ChatCompletionWithReasoning.cs) +- [OpenAI_ChatCompletionWithVision](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/OpenAI_ChatCompletionWithVision.cs) +- [OpenAI_CustomClient](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/OpenAI_CustomClient.cs) +- [OpenAI_FunctionCalling](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/OpenAI_FunctionCalling.cs) +- [OpenAI_FunctionCallingWithMemoryPlugin](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/OpenAI_FunctionCallingWithMemoryPlugin.cs) +- [OpenAI_ReasonedFunctionCalling](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/OpenAI_ReasonedFunctionCalling.cs) +- [OpenAI_RepeatedFunctionCalling](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/OpenAI_RepeatedFunctionCalling.cs) +- [OpenAI_StructuredOutputs](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/OpenAI_StructuredOutputs.cs) +- [OpenAI_UsingLogitBias](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ChatCompletion/OpenAI_UsingLogitBias.cs) ### DependencyInjection - Examples on using `DI Container` @@ -121,11 +130,6 @@ dotnet test -l "console;verbosity=detailed" --filter "FullyQualifiedName=ChatCom - [HuggingFace_ImageToText](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/ImageToText/HuggingFace_ImageToText.cs) -### LocalModels - Running models locally - -- [HuggingFace_ChatCompletionWithTGI](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/LocalModels/HuggingFace_ChatCompletionWithTGI.cs) -- [MultipleProviders_ChatCompletion](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/LocalModels/MultipleProviders_ChatCompletion.cs) - ### Memory - Using AI [`Memory`](https://github.com/microsoft/semantic-kernel/tree/main/dotnet/src/SemanticKernel.Abstractions/Memory) concepts - [OpenAI_EmbeddingGeneration](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/OpenAI_EmbeddingGeneration.cs) diff --git a/dotnet/src/InternalUtilities/samples/InternalUtilities/BaseTest.cs b/dotnet/src/InternalUtilities/samples/InternalUtilities/BaseTest.cs index 03c09e63551b..78816c97e2e2 100644 --- a/dotnet/src/InternalUtilities/samples/InternalUtilities/BaseTest.cs +++ b/dotnet/src/InternalUtilities/samples/InternalUtilities/BaseTest.cs @@ -141,6 +141,33 @@ protected void OutputLastMessage(ChatHistory chatHistory) Console.WriteLine("------------------------"); } + /// + /// Outputs out the stream of generated message tokens. + /// + protected async Task StreamMessageOutputAsync(IChatCompletionService chatCompletionService, ChatHistory chatHistory, AuthorRole authorRole) + { + bool roleWritten = false; + string fullMessage = string.Empty; + + await foreach (var chatUpdate in chatCompletionService.GetStreamingChatMessageContentsAsync(chatHistory)) + { + if (!roleWritten && chatUpdate.Role.HasValue) + { + Console.Write($"{chatUpdate.Role.Value}: {chatUpdate.Content}"); + roleWritten = true; + } + + if (chatUpdate.Content is { Length: > 0 }) + { + fullMessage += chatUpdate.Content; + Console.Write(chatUpdate.Content); + } + } + + Console.WriteLine("\n------------------------"); + chatHistory.AddMessage(authorRole, fullMessage); + } + /// /// Utility method to write a horizontal rule to the console. ///