Skip to content

.Net: Add Ollama ChatClient Extensions + UT #12476

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 19 additions & 21 deletions dotnet/samples/Concepts/ChatCompletion/Ollama_ChatCompletion.cs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
// Copyright (c) Microsoft. All rights reserved.

using System.Text;
using Microsoft.Extensions.AI;
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.ChatCompletion;
using OllamaSharp;
using OllamaSharp.Models.Chat;

namespace ChatCompletion;

Expand All @@ -17,39 +17,37 @@ public class Ollama_ChatCompletion(ITestOutputHelper output) : BaseTest(output)
/// Demonstrates how you can use the chat completion service directly.
/// </summary>
[Fact]
public async Task ServicePromptAsync()
public async Task UsingChatClientPromptAsync()
{
Assert.NotNull(TestConfiguration.Ollama.ModelId);

Console.WriteLine("======== Ollama - Chat Completion ========");

using var ollamaClient = new OllamaApiClient(
using IChatClient ollamaClient = new OllamaApiClient(
uriString: TestConfiguration.Ollama.Endpoint,
defaultModel: TestConfiguration.Ollama.ModelId);

var chatService = ollamaClient.AsChatCompletionService();

Console.WriteLine("Chat content:");
Console.WriteLine("------------------------");

var chatHistory = new ChatHistory("You are a librarian, expert about books");
List<ChatMessage> chatHistory = [new ChatMessage(ChatRole.System, "You are a librarian, expert about books")];

// First user message
chatHistory.AddUserMessage("Hi, I'm looking for book suggestions");
chatHistory.Add(new(ChatRole.User, "Hi, I'm looking for book suggestions"));
this.OutputLastMessage(chatHistory);

// First assistant message
var reply = await chatService.GetChatMessageContentAsync(chatHistory);
chatHistory.Add(reply);
var reply = await ollamaClient.GetResponseAsync(chatHistory);
chatHistory.AddRange(reply.Messages);
this.OutputLastMessage(chatHistory);

// Second user message
chatHistory.AddUserMessage("I love history and philosophy, I'd like to learn something new about Greece, any suggestion");
chatHistory.Add(new(ChatRole.User, "I love history and philosophy, I'd like to learn something new about Greece, any suggestion"));
this.OutputLastMessage(chatHistory);

// Second assistant message
reply = await chatService.GetChatMessageContentAsync(chatHistory);
chatHistory.Add(reply);
reply = await ollamaClient.GetResponseAsync(chatHistory);
chatHistory.AddRange(reply.Messages);
this.OutputLastMessage(chatHistory);
}

Expand All @@ -61,7 +59,7 @@ public async Task ServicePromptAsync()
/// may cause breaking changes in the code below.
/// </remarks>
[Fact]
public async Task ServicePromptWithInnerContentAsync()
public async Task UsingChatCompletionServicePromptWithInnerContentAsync()
{
Assert.NotNull(TestConfiguration.Ollama.ModelId);

Expand All @@ -87,9 +85,9 @@ public async Task ServicePromptWithInnerContentAsync()

// Assistant message details
// Ollama Sharp does not support non-streaming and always perform streaming calls, for this reason, the inner content is always a list of chunks.
var replyInnerContent = reply.InnerContent as ChatDoneResponseStream;
var ollamaSharpInnerContent = reply.InnerContent as OllamaSharp.Models.Chat.ChatDoneResponseStream;

OutputInnerContent(replyInnerContent!);
OutputOllamaSharpContent(ollamaSharpInnerContent!);
}

/// <summary>
Expand All @@ -106,7 +104,7 @@ public async Task ChatPromptAsync()
""");

var kernel = Kernel.CreateBuilder()
.AddOllamaChatCompletion(
.AddOllamaChatClient(
endpoint: new Uri(TestConfiguration.Ollama.Endpoint ?? "http://localhost:11434"),
modelId: TestConfiguration.Ollama.ModelId)
.Build();
Expand Down Expand Up @@ -139,18 +137,18 @@ public async Task ChatPromptWithInnerContentAsync()
""");

var kernel = Kernel.CreateBuilder()
.AddOllamaChatCompletion(
.AddOllamaChatClient(
endpoint: new Uri(TestConfiguration.Ollama.Endpoint ?? "http://localhost:11434"),
modelId: TestConfiguration.Ollama.ModelId)
.Build();

var functionResult = await kernel.InvokePromptAsync(chatPrompt.ToString());

// Ollama Sharp does not support non-streaming and always perform streaming calls, for this reason, the inner content of a non-streaming result is a list of the generated chunks.
var messageContent = functionResult.GetValue<ChatMessageContent>(); // Retrieves underlying chat message content from FunctionResult.
var replyInnerContent = messageContent!.InnerContent as ChatDoneResponseStream; // Retrieves inner content from ChatMessageContent.
var messageContent = functionResult.GetValue<ChatResponse>(); // Retrieves underlying chat message content from FunctionResult.
var ollamaSharpRawRepresentation = messageContent!.RawRepresentation as OllamaSharp.Models.Chat.ChatDoneResponseStream; // Retrieves inner content from ChatMessageContent.

OutputInnerContent(replyInnerContent!);
OutputOllamaSharpContent(ollamaSharpRawRepresentation!);
}

/// <summary>
Expand All @@ -161,7 +159,7 @@ public async Task ChatPromptWithInnerContentAsync()
/// This is a breaking glass scenario, any attempt on running with different versions of OllamaSharp library that introduces breaking changes
/// may cause breaking changes in the code below.
/// </remarks>
private void OutputInnerContent(ChatDoneResponseStream innerContent)
private void OutputOllamaSharpContent(OllamaSharp.Models.Chat.ChatDoneResponseStream innerContent)
{
Console.WriteLine($$"""
Model: {{innerContent.Model}}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
// Copyright (c) Microsoft. All rights reserved.

using System.Text;
using Microsoft.Extensions.AI;
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.ChatCompletion;
using OllamaSharp;
using OllamaSharp.Models.Chat;

namespace ChatCompletion;

Expand All @@ -14,14 +14,49 @@ namespace ChatCompletion;
public class Ollama_ChatCompletionStreaming(ITestOutputHelper output) : BaseTest(output)
{
/// <summary>
/// This example demonstrates chat completion streaming using Ollama.
/// This example demonstrates chat completion streaming using <see cref="IChatClient"/> directly.
/// </summary>
[Fact]
public async Task UsingServiceStreamingWithOllama()
public async Task UsingChatClientStreaming()
{
Assert.NotNull(TestConfiguration.Ollama.ModelId);

Console.WriteLine($"======== Ollama - Chat Completion - {nameof(UsingServiceStreamingWithOllama)} ========");
Console.WriteLine($"======== Ollama - Chat Completion - {nameof(UsingChatClientStreaming)} ========");

using IChatClient ollamaClient = new OllamaApiClient(
uriString: TestConfiguration.Ollama.Endpoint,
defaultModel: TestConfiguration.Ollama.ModelId);

Console.WriteLine("Chat content:");
Console.WriteLine("------------------------");

List<ChatMessage> chatHistory = [new ChatMessage(ChatRole.System, "You are a librarian, expert about books")];
this.OutputLastMessage(chatHistory);

// First user message
chatHistory.Add(new(ChatRole.User, "Hi, I'm looking for book suggestions"));
this.OutputLastMessage(chatHistory);

// First assistant message
await StreamChatClientMessageOutputAsync(ollamaClient, chatHistory);

// Second user message
chatHistory.Add(new(Microsoft.Extensions.AI.ChatRole.User, "I love history and philosophy, I'd like to learn something new about Greece, any suggestion?"));
this.OutputLastMessage(chatHistory);

// Second assistant message
await StreamChatClientMessageOutputAsync(ollamaClient, chatHistory);
}

/// <summary>
/// This example demonstrates chat completion streaming using <see cref="IChatCompletionService"/> directly.
/// </summary>
[Fact]
public async Task UsingChatCompletionServiceStreamingWithOllama()
{
Assert.NotNull(TestConfiguration.Ollama.ModelId);

Console.WriteLine($"======== Ollama - Chat Completion - {nameof(UsingChatCompletionServiceStreamingWithOllama)} ========");

using var ollamaClient = new OllamaApiClient(
uriString: TestConfiguration.Ollama.Endpoint,
Expand Down Expand Up @@ -51,58 +86,56 @@ public async Task UsingServiceStreamingWithOllama()
}

/// <summary>
/// This example demonstrates retrieving underlying library information through chat completion streaming inner contents.
/// This example demonstrates retrieving underlying OllamaSharp library information through <see cref="IChatClient" /> streaming raw representation (breaking glass) approach.
/// </summary>
/// <remarks>
/// This is a breaking glass scenario and is more susceptible to break on newer versions of OllamaSharp library.
/// </remarks>
[Fact]
public async Task UsingServiceStreamingInnerContentsWithOllama()
public async Task UsingChatClientStreamingRawContentsWithOllama()
{
Assert.NotNull(TestConfiguration.Ollama.ModelId);

Console.WriteLine($"======== Ollama - Chat Completion - {nameof(UsingServiceStreamingInnerContentsWithOllama)} ========");
Console.WriteLine($"======== Ollama - Chat Completion - {nameof(UsingChatClientStreamingRawContentsWithOllama)} ========");

using var ollamaClient = new OllamaApiClient(
using IChatClient ollamaClient = new OllamaApiClient(
uriString: TestConfiguration.Ollama.Endpoint,
defaultModel: TestConfiguration.Ollama.ModelId);

var chatService = ollamaClient.AsChatCompletionService();

Console.WriteLine("Chat content:");
Console.WriteLine("------------------------");

var chatHistory = new ChatHistory("You are a librarian, expert about books");
List<ChatMessage> chatHistory = [new ChatMessage(ChatRole.System, "You are a librarian, expert about books")];
this.OutputLastMessage(chatHistory);

// First user message
chatHistory.AddUserMessage("Hi, I'm looking for book suggestions");
chatHistory.Add(new(ChatRole.User, "Hi, I'm looking for book suggestions"));
this.OutputLastMessage(chatHistory);

await foreach (var chatUpdate in chatService.GetStreamingChatMessageContentsAsync(chatHistory))
await foreach (var chatUpdate in ollamaClient.GetStreamingResponseAsync(chatHistory))
{
var innerContent = chatUpdate.InnerContent as ChatResponseStream;
OutputInnerContent(innerContent!);
var rawRepresentation = chatUpdate.RawRepresentation as OllamaSharp.Models.Chat.ChatResponseStream;
OutputOllamaSharpContent(rawRepresentation!);
}
}

/// <summary>
/// Demonstrates how you can template a chat history call while using the <see cref="Kernel"/> for invocation.
/// </summary>
[Fact]
public async Task UsingKernelChatPromptStreamingWithOllama()
public async Task UsingKernelChatPromptStreaming()
{
Assert.NotNull(TestConfiguration.Ollama.ModelId);

Console.WriteLine($"======== Ollama - Chat Completion - {nameof(UsingKernelChatPromptStreamingWithOllama)} ========");
Console.WriteLine($"======== Ollama - Chat Completion - {nameof(UsingKernelChatPromptStreaming)} ========");

StringBuilder chatPrompt = new("""
<message role="system">You are a librarian, expert about books</message>
<message role="user">Hi, I'm looking for book suggestions</message>
""");

var kernel = Kernel.CreateBuilder()
.AddOllamaChatCompletion(
.AddOllamaChatClient(
endpoint: new Uri(TestConfiguration.Ollama.Endpoint),
modelId: TestConfiguration.Ollama.ModelId)
.Build();
Expand All @@ -124,19 +157,19 @@ public async Task UsingKernelChatPromptStreamingWithOllama()
/// This is a breaking glass scenario and is more susceptible to break on newer versions of OllamaSharp library.
/// </remarks>
[Fact]
public async Task UsingKernelChatPromptStreamingInnerContentsWithOllama()
public async Task UsingKernelChatPromptStreamingRawRepresentation()
{
Assert.NotNull(TestConfiguration.Ollama.ModelId);

Console.WriteLine($"======== Ollama - Chat Completion - {nameof(UsingKernelChatPromptStreamingInnerContentsWithOllama)} ========");
Console.WriteLine($"======== Ollama - Chat Completion - {nameof(UsingKernelChatPromptStreamingRawRepresentation)} ========");

StringBuilder chatPrompt = new("""
<message role="system">You are a librarian, expert about books</message>
<message role="user">Hi, I'm looking for book suggestions</message>
""");

var kernel = Kernel.CreateBuilder()
.AddOllamaChatCompletion(
.AddOllamaChatClient(
endpoint: new Uri(TestConfiguration.Ollama.Endpoint),
modelId: TestConfiguration.Ollama.ModelId)
.Build();
Expand All @@ -148,8 +181,8 @@ public async Task UsingKernelChatPromptStreamingInnerContentsWithOllama()

await foreach (var chatUpdate in kernel.InvokePromptStreamingAsync<StreamingChatMessageContent>(chatPrompt.ToString()))
{
var innerContent = chatUpdate.InnerContent as ChatResponseStream;
OutputInnerContent(innerContent!);
var innerContent = chatUpdate.InnerContent as OllamaSharp.Models.Chat.ChatResponseStream;
OutputOllamaSharpContent(innerContent!);
}
}

Expand All @@ -159,11 +192,11 @@ public async Task UsingKernelChatPromptStreamingInnerContentsWithOllama()
/// and alternatively via the StreamingChatMessageContent.Items property.
/// </summary>
[Fact]
public async Task UsingStreamingTextFromChatCompletionWithOllama()
public async Task UsingStreamingTextFromChatCompletion()
{
Assert.NotNull(TestConfiguration.Ollama.ModelId);

Console.WriteLine($"======== Ollama - Chat Completion - {nameof(UsingStreamingTextFromChatCompletionWithOllama)} ========");
Console.WriteLine($"======== Ollama - Chat Completion - {nameof(UsingStreamingTextFromChatCompletion)} ========");

using var ollamaClient = new OllamaApiClient(
uriString: TestConfiguration.Ollama.Endpoint,
Expand Down Expand Up @@ -212,6 +245,29 @@ private async Task<string> StreamMessageOutputFromKernelAsync(Kernel kernel, str
return fullMessage;
}

private async Task StreamChatClientMessageOutputAsync(IChatClient chatClient, List<ChatMessage> chatHistory)
{
bool roleWritten = false;
string fullMessage = string.Empty;
List<ChatResponseUpdate> chatUpdates = [];
await foreach (var chatUpdate in chatClient.GetStreamingResponseAsync(chatHistory))
{
chatUpdates.Add(chatUpdate);
if (!roleWritten && !string.IsNullOrEmpty(chatUpdate.Text))
{
Console.Write($"Assistant: {chatUpdate.Text}");
roleWritten = true;
}
else if (!string.IsNullOrEmpty(chatUpdate.Text))
{
Console.Write(chatUpdate.Text);
}
}

Console.WriteLine("\n------------------------");
chatHistory.AddRange(chatUpdates.ToChatResponse().Messages);
}

/// <summary>
/// Retrieve extra information from each streaming chunk response.
/// </summary>
Expand All @@ -220,7 +276,7 @@ private async Task<string> StreamMessageOutputFromKernelAsync(Kernel kernel, str
/// This is a breaking glass scenario, any attempt on running with different versions of OllamaSharp library that introduces breaking changes
/// may cause breaking changes in the code below.
/// </remarks>
private void OutputInnerContent(ChatResponseStream streamChunk)
private void OutputOllamaSharpContent(OllamaSharp.Models.Chat.ChatResponseStream streamChunk)
{
Console.WriteLine($$"""
Model: {{streamChunk.Model}}
Expand All @@ -230,8 +286,8 @@ private void OutputInnerContent(ChatResponseStream streamChunk)
Done: {{streamChunk.Done}}
""");

/// The last message in the chunk is a <see cref="ChatDoneResponseStream"/> type with additional metadata.
if (streamChunk is ChatDoneResponseStream doneStream)
/// The last message in the chunk is a <see cref="OllamaSharp.Models.Chat.ChatDoneResponseStream"/> type with additional metadata.
if (streamChunk is OllamaSharp.Models.Chat.ChatDoneResponseStream doneStream)
{
Console.WriteLine($$"""
Done Reason: {{doneStream.DoneReason}}
Expand All @@ -245,4 +301,10 @@ private void OutputInnerContent(ChatResponseStream streamChunk)
}
Console.WriteLine("------------------------");
}

private void OutputLastMessage(List<ChatMessage> chatHistory)
{
var message = chatHistory.Last();
Console.WriteLine($"{message.Role}: {message.Text}");
}
}
Loading
Loading