microsoft · RogerBarreto · Mar 4, 2025 · Mar 2, 2025 · Mar 2, 2025 · Mar 2, 2025
@@ -461,6 +461,7 @@ EndProject
 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ProcessFramework.Aspire.SummaryAgent", "samples\Demos\ProcessFrameworkWithAspire\ProcessFramework.Aspire\ProcessFramework.Aspire.SummaryAgent\ProcessFramework.Aspire.SummaryAgent.csproj", "{37381352-4F10-427F-AB8A-51FEAB265201}"
 EndProject
 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ProcessFramework.Aspire.TranslatorAgent", "samples\Demos\ProcessFrameworkWithAspire\ProcessFramework.Aspire\ProcessFramework.Aspire.TranslatorAgent\ProcessFramework.Aspire.TranslatorAgent.csproj", "{DAD5FC6A-8CA0-43AC-87E1-032DFBD6B02A}"
+EndProject
 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Agents.Bedrock", "src\Agents\Bedrock\Agents.Bedrock.csproj", "{8C658E1E-83C8-4127-B8BF-27A638A45DDD}"
 EndProject
 Global

@@ -8,7 +8,11 @@
 
 namespace ChatCompletion;
 
-// The following example shows how to use Semantic Kernel with Azure AI Inference / Azure AI Studio
+/// <summary>
+/// These examples demonstrate different ways of using chat completion with Azure Foundry or GitHub models.
+/// Azure AI Foundry: https://ai.azure.com/explore/models
+/// GitHub Models: https://github.com/marketplace?type=models
+/// </summary>
 public class AzureAIInference_ChatCompletion(ITestOutputHelper output) : BaseTest(output)
 {
     [Fact]

@@ -9,7 +9,9 @@
 namespace ChatCompletion;
 
 /// <summary>
-/// These examples demonstrate the ways different content types are streamed by OpenAI LLM via the chat completion service.
+/// These examples demonstrate different ways of using streaming chat completion with Azure Foundry or GitHub models.
+/// Azure AI Foundry: https://ai.azure.com/explore/models
+/// GitHub Models: https://github.com/marketplace?type=models
 /// </summary>
 public class AzureAIInference_ChatCompletionStreaming(ITestOutputHelper output) : BaseTest(output)
 {
@@ -120,36 +122,6 @@ private async Task StartStreamingChatAsync(IChatCompletionService chatCompletion
         await StreamMessageOutputAsync(chatCompletionService, chatHistory, AuthorRole.Assistant);
     }
 
-    /// <summary>
-    /// Streams the message output from the chat completion service.
-    /// </summary>
-    /// <param name="chatCompletionService">The chat completion service instance.</param>
-    /// <param name="chatHistory">The chat history instance.</param>
-    /// <param name="authorRole">The author role.</param>
-    private async Task StreamMessageOutputAsync(IChatCompletionService chatCompletionService, ChatHistory chatHistory, AuthorRole authorRole)
-    {
-        bool roleWritten = false;
-        string fullMessage = string.Empty;
-
-        await foreach (var chatUpdate in chatCompletionService.GetStreamingChatMessageContentsAsync(chatHistory))
-        {
-            if (!roleWritten && chatUpdate.Role.HasValue)
-            {
-                Console.Write($"{chatUpdate.Role.Value}: {chatUpdate.Content}");
-                roleWritten = true;
-            }
-
-            if (chatUpdate.Content is { Length: > 0 })
-            {
-                fullMessage += chatUpdate.Content;
-                Console.Write(chatUpdate.Content);
-            }
-        }
-
-        Console.WriteLine("\n------------------------");
-        chatHistory.AddMessage(authorRole, fullMessage);
-    }
-
     /// <summary>
     /// Outputs the chat history by streaming the message output from the kernel.
     /// </summary>

@@ -8,7 +8,9 @@
 
 namespace ChatCompletion;
 
-// The following example shows how to use Semantic Kernel with Azure OpenAI API
+/// <summary>
+/// These examples demonstrate different ways of using chat completion with Azure OpenAI API.
+/// </summary>
 public class AzureOpenAI_ChatCompletion(ITestOutputHelper output) : BaseTest(output)
 {
     /// <summary>

@@ -8,7 +8,7 @@
 namespace ChatCompletion;
 
 /// <summary>
-/// These examples demonstrate the ways different content types are streamed by Azure OpenAI via the chat completion service.
+/// These examples demonstrate different ways of using streaming chat completion with Azure OpenAI API.
 /// </summary>
 public class AzureOpenAI_ChatCompletionStreaming(ITestOutputHelper output) : BaseTest(output)
 {
@@ -128,28 +128,4 @@ private async Task StartStreamingChatAsync(IChatCompletionService chatCompletion
         // Second assistant message
         await StreamMessageOutputAsync(chatCompletionService, chatHistory, AuthorRole.Assistant);
     }
-
-    private async Task StreamMessageOutputAsync(IChatCompletionService chatCompletionService, ChatHistory chatHistory, AuthorRole authorRole)
-    {
-        bool roleWritten = false;
-        string fullMessage = string.Empty;
-
-        await foreach (var chatUpdate in chatCompletionService.GetStreamingChatMessageContentsAsync(chatHistory))
-        {
-            if (!roleWritten && chatUpdate.Role.HasValue)
-            {
-                Console.Write($"{chatUpdate.Role.Value}: {chatUpdate.Content}");
-                roleWritten = true;
-            }
-
-            if (chatUpdate.Content is { Length: > 0 })
-            {
-                fullMessage += chatUpdate.Content;
-                Console.Write(chatUpdate.Content);
-            }
-        }
-
-        Console.WriteLine("\n------------------------");
-        chatHistory.AddMessage(authorRole, fullMessage);
-    }
 }
@@ -8,8 +8,10 @@
 
 namespace ChatCompletion;
 
-// The following example shows how to use Semantic Kernel with Azure OpenAI API
-public class AzureOpenAI_ChatCompletion_WithReasoning(ITestOutputHelper output) : BaseTest(output)
+/// <summary>
+/// These examples demonstrate different ways of using chat completion reasoning models with Azure OpenAI API.
+/// </summary>
+public class AzureOpenAI_ChatCompletionWithReasoning(ITestOutputHelper output) : BaseTest(output)
 {
     /// <summary>
     /// Sample showing how to use <see cref="Kernel"/> with chat completion and chat prompt syntax.

@@ -5,27 +5,34 @@
 using Azure.AI.OpenAI;
 using Microsoft.SemanticKernel;
 
+#pragma warning disable CA5399 // HttpClient is created without enabling CheckCertificateRevocationList
+
 namespace ChatCompletion;
 
+/// <summary>
+/// This example shows a way of using a Custom HttpClient and HttpHandler with Azure OpenAI Connector to capture
+/// the request Uri and Headers for each request.
+/// </summary>
 public sealed class AzureOpenAI_CustomClient(ITestOutputHelper output) : BaseTest(output)
 {
     [Fact]
-    public async Task RunAsync()
+    public async Task UsingCustomHttpClientWithAzureOpenAI()
     {
-        Console.WriteLine("======== Using a custom AzureOpenAI client ========");
-
         Assert.NotNull(TestConfiguration.AzureOpenAI.Endpoint);
         Assert.NotNull(TestConfiguration.AzureOpenAI.ChatDeploymentName);
         Assert.NotNull(TestConfiguration.AzureOpenAI.ApiKey);
 
+        Console.WriteLine($"======== Azure Open AI - {nameof(UsingCustomHttpClientWithAzureOpenAI)} ========");
+
         // Create an HttpClient and include your custom header(s)
-        var httpClient = new HttpClient();
-        httpClient.DefaultRequestHeaders.Add("My-Custom-Header", "My Custom Value");
+        using var myCustomHttpHandler = new MyCustomClientHttpHandler(Output);
+        using var myCustomClient = new HttpClient(handler: myCustomHttpHandler);
+        myCustomClient.DefaultRequestHeaders.Add("My-Custom-Header", "My Custom Value");
 
         // Configure AzureOpenAIClient to use the customized HttpClient
         var clientOptions = new AzureOpenAIClientOptions
         {
-            Transport = new HttpClientPipelineTransport(httpClient),
+            Transport = new HttpClientPipelineTransport(myCustomClient),
             NetworkTimeout = TimeSpan.FromSeconds(30),
             RetryPolicy = new ClientRetryPolicy()
         };
@@ -48,6 +55,27 @@ public async Task RunAsync()
         );
         Console.WriteLine(result.GetValue<string>());
 
-        httpClient.Dispose();
+        myCustomClient.Dispose();
+    }
+
+    /// <summary>
+    /// Normally you would use a custom HttpClientHandler to add custom logic to your custom http client
+    /// This uses the ITestOutputHelper to write the requested URI to the test output
+    /// </summary>
+    /// <param name="output">The <see cref="ITestOutputHelper"/> to write the requested URI to the test output </param>
+    private sealed class MyCustomClientHttpHandler(ITestOutputHelper output) : HttpClientHandler
+    {
+        protected override async Task<HttpResponseMessage> SendAsync(HttpRequestMessage request, CancellationToken cancellationToken)
+        {
+            output.WriteLine($"Requested URI: {request.RequestUri}");
+
+            request.Headers.Where(h => h.Key != "Authorization")
+                .ToList()
+                .ForEach(h => output.WriteLine($"{h.Key}: {string.Join(", ", h.Value)}"));
+            output.WriteLine("--------------------------------");
+
+            // Add custom logic here
+            return await base.SendAsync(request, cancellationToken);
+        }
     }
 }
@@ -0,0 +1,97 @@
+// Copyright (c) Microsoft. All rights reserved.
+
+using Microsoft.SemanticKernel;
+using Microsoft.SemanticKernel.ChatCompletion;
+using Microsoft.SemanticKernel.Connectors.HuggingFace;
+
+namespace ChatCompletion;
+
+/// <summary>
+/// This example shows a way of using Hugging Face connector with HuggingFace Text Generation Inference (TGI) API.
+/// Follow steps in <see href="https://huggingface.co/docs/text-generation-inference/main/en/quicktour"/> to setup HuggingFace local Text Generation Inference HTTP server.
+/// <list type="number">
+/// <item>Install HuggingFace TGI via docker</item>
+/// <item><c>docker run -d --gpus all --shm-size 1g -p 8080:80 -v "c:\temp\huggingface:/data" ghcr.io/huggingface/text-generation-inference:latest --model-id teknium/OpenHermes-2.5-Mistral-7B</c></item>
+/// <item>Run the examples</item>
+/// </list>
+/// </summary>
+public class HuggingFace_ChatCompletion(ITestOutputHelper output) : BaseTest(output)
+{
+    /// <summary>
+    /// This example shows how to setup LMStudio to use with the <see cref="Kernel"/> InvokeAsync (Non-Streaming).
+    /// </summary>
+    [Fact]
+#pragma warning restore CS0419 // Ambiguous reference in cref attribute
+    public async Task UsingKernelNonStreamingWithHuggingFace()
+    {
+        Console.WriteLine($"======== HuggingFace - Chat Completion - {nameof(UsingKernelNonStreamingWithHuggingFace)} ========");
+
+        var endpoint = new Uri("http://localhost:8080"); // Update the endpoint if you chose a different port. (defaults to 8080)
+        var modelId = "teknium/OpenHermes-2.5-Mistral-7B"; // Update the modelId if you chose a different model.
+
+        var kernel = Kernel.CreateBuilder()
+            .AddHuggingFaceChatCompletion(
+                model: modelId,
+                apiKey: null,
+                endpoint: endpoint)
+            .Build();
+
+        var prompt = @"Rewrite the text between triple backticks into a business mail. Use a professional tone, be clear and concise.
+                   Sign the mail as AI Assistant.
+
+                   Text: ```{{$input}}```";
+
+        var mailFunction = kernel.CreateFunctionFromPrompt(prompt, new HuggingFacePromptExecutionSettings
+        {
+            TopP = 0.5f,
+            MaxTokens = 1000,
+        });
+
+        var response = await kernel.InvokeAsync(mailFunction, new() { ["input"] = "Tell David that I'm going to finish the business plan by the end of the week." });
+        Console.WriteLine(response);
+    }
+
+    /// <summary>
+    /// Sample showing how to use <see cref="IChatCompletionService"/> directly with a <see cref="ChatHistory"/>.
+    /// </summary>
+    [Fact]
+    public async Task UsingServiceNonStreamingWithHuggingFace()
+    {
+        Console.WriteLine($"======== HuggingFace - Chat Completion - {nameof(UsingServiceNonStreamingWithHuggingFace)} ========");
+
+        // HuggingFace local HTTP server endpoint
+        var endpoint = new Uri("http://localhost:8080"); // Update the endpoint if you chose a different port. (defaults to 8080)
+        var modelId = "teknium/OpenHermes-2.5-Mistral-7B"; // Update the modelId if you chose a different model.
+
+        Kernel kernel = Kernel.CreateBuilder()
+            .AddHuggingFaceChatCompletion(
+                model: modelId,
+                endpoint: endpoint)
+            .Build();
+
+        var chatService = kernel.GetRequiredService<IChatCompletionService>();
+
+        Console.WriteLine("Chat content:");
+        Console.WriteLine("------------------------");
+
+        var chatHistory = new ChatHistory("You are a librarian, expert about books");
+
+        // First user message
+        chatHistory.AddUserMessage("Hi, I'm looking for book suggestions");
+        OutputLastMessage(chatHistory);
+
+        // First assistant message
+        var reply = await chatService.GetChatMessageContentAsync(chatHistory);
+        chatHistory.Add(reply);
+        OutputLastMessage(chatHistory);
+
+        // Second user message
+        chatHistory.AddUserMessage("I love history and philosophy, I'd like to learn something new about Greece, any suggestion");
+        OutputLastMessage(chatHistory);
+
+        // Second assistant message
+        reply = await chatService.GetChatMessageContentAsync(chatHistory);
+        chatHistory.Add(reply);
+        OutputLastMessage(chatHistory);
+    }
+}
@@ -0,0 +1,95 @@
+// Copyright (c) Microsoft. All rights reserved.
+
+using Microsoft.SemanticKernel;
+using Microsoft.SemanticKernel.ChatCompletion;
+using Microsoft.SemanticKernel.Connectors.HuggingFace;
+
+namespace ChatCompletion;
+
+/// <summary>
+/// This example shows a way of using Hugging Face connector with HuggingFace Text Generation Inference (TGI) API.
+/// Follow steps in <see href="https://huggingface.co/docs/text-generation-inference/main/en/quicktour"/> to setup HuggingFace local Text Generation Inference HTTP server.
+/// <list type="number">
+/// <item>Install HuggingFace TGI via docker</item>
+/// <item><c>docker run -d --gpus all --shm-size 1g -p 8080:80 -v "c:\temp\huggingface:/data" ghcr.io/huggingface/text-generation-inference:latest --model-id teknium/OpenHermes-2.5-Mistral-7B</c></item>
+/// <item>Run the examples</item>
+/// </list>
+/// </summary>
+public class HuggingFace_ChatCompletionStreaming(ITestOutputHelper output) : BaseTest(output)
+{
+    /// <summary>
+    /// Sample showing how to use <see cref="IChatCompletionService"/> directly with a <see cref="ChatHistory"/>.
+    /// </summary>
+    [Fact]
+    public async Task UsingServiceStreamingWithHuggingFace()
+    {
+        Console.WriteLine($"======== HuggingFace - Chat Completion - {nameof(UsingServiceStreamingWithHuggingFace)} ========");
+
+        // HuggingFace local HTTP server endpoint
+        var endpoint = new Uri("http://localhost:8080"); // Update the endpoint if you chose a different port. (defaults to 8080)
+        var modelId = "teknium/OpenHermes-2.5-Mistral-7B"; // Update the modelId if you chose a different model.
+
+        Kernel kernel = Kernel.CreateBuilder()
+            .AddHuggingFaceChatCompletion(
+                model: modelId,
+                endpoint: endpoint)
+            .Build();
+
+        var chatService = kernel.GetRequiredService<IChatCompletionService>();
+
+        Console.WriteLine("Chat content:");
+        Console.WriteLine("------------------------");
+
+        var chatHistory = new ChatHistory("You are a librarian, expert about books");
+        OutputLastMessage(chatHistory);
+
+        // First user message
+        chatHistory.AddUserMessage("Hi, I'm looking for book suggestions");
+        OutputLastMessage(chatHistory);
+
+        // First assistant message
+        await StreamMessageOutputAsync(chatService, chatHistory, AuthorRole.Assistant);
+
+        // Second user message
+        chatHistory.AddUserMessage("I love history and philosophy, I'd like to learn something new about Greece, any suggestion?");
+        OutputLastMessage(chatHistory);
+
+        // Second assistant message
+        await StreamMessageOutputAsync(chatService, chatHistory, AuthorRole.Assistant);
+    }
+
+    /// <summary>
+    /// This example shows how to setup LMStudio to use with the <see cref="Kernel"/> InvokeAsync (Non-Streaming).
+    /// </summary>
+    [Fact]
+    public async Task UsingKernelStreamingWithHuggingFace()
+    {
+        Console.WriteLine($"======== HuggingFace - Chat Completion - {nameof(UsingKernelStreamingWithHuggingFace)} ========");
+
+        var endpoint = new Uri("http://localhost:8080"); // Update the endpoint if you chose a different port. (defaults to 8080)
+        var modelId = "teknium/OpenHermes-2.5-Mistral-7B"; // Update the modelId if you chose a different model.
+
+        var kernel = Kernel.CreateBuilder()
+            .AddHuggingFaceChatCompletion(
+                model: modelId,
+                apiKey: null,
+                endpoint: endpoint)
+            .Build();
+
+        var prompt = @"Rewrite the text between triple backticks into a business mail. Use a professional tone, be clear and concise.
+                   Sign the mail as AI Assistant.
+
+                   Text: ```{{$input}}```";
+
+        var mailFunction = kernel.CreateFunctionFromPrompt(prompt, new HuggingFacePromptExecutionSettings
+        {
+            TopP = 0.5f,
+            MaxTokens = 1000,
+        });
+
+        await foreach (var word in kernel.InvokeStreamingAsync(mailFunction, new() { ["input"] = "Tell David that I'm going to finish the business plan by the end of the week." }))
+        {
+            Console.WriteLine(word);
+        }
+    }
+}