microsoft · Cobra86 · Jun 18, 2025 · Jun 18, 2025 · Jun 20, 2025 · Jun 20, 2025
@@ -1809,6 +1809,343 @@ public async Task GetStreamingChatMessageContentsWithFunctionCallAndEmptyArgumen
         Assert.Equal(1, functionCallCount);
     }
 
+    // Sample audio content for testing
+    private static readonly byte[] s_sampleAudioBytes = { 0x01, 0x02, 0x03, 0x04 };
+
+    [Fact]
+    public async Task ItSendsAudioContentCorrectlyAsync()
+    {
+        // Arrange
+        var service = new AzureOpenAIChatCompletionService("deployment", "https://endpoint", "api-key", "model-id", this._httpClient);
+
+        using var responseMessage = new HttpResponseMessage(HttpStatusCode.OK)
+        {
+            Content = new StringContent(AzureOpenAITestHelper.GetTestResponse("chat_completion_test_response.json"))
+        };
+        this._messageHandlerStub.ResponsesToReturn.Add(responseMessage);
+
+        var chatHistory = new ChatHistory();
+        chatHistory.AddUserMessage([
+            new TextContent("What's in this audio?"),
+        new AudioContent(s_sampleAudioBytes, "audio/mp3")
+        ]);
+
+        // Act
+        await service.GetChatMessageContentsAsync(chatHistory);
+
+        // Assert
+        var actualRequestContent = Encoding.UTF8.GetString(this._messageHandlerStub.RequestContents[0]!);
+        Assert.NotNull(actualRequestContent);
+        var optionsJson = JsonSerializer.Deserialize<JsonElement>(actualRequestContent);
+
+        var messages = optionsJson.GetProperty("messages");
+        Assert.Equal(1, messages.GetArrayLength());
+
+        var contentItems = messages[0].GetProperty("content");
+        Assert.Equal(2, contentItems.GetArrayLength());
+
+        Assert.Equal("text", contentItems[0].GetProperty("type").GetString());
+        Assert.Equal("What's in this audio?", contentItems[0].GetProperty("text").GetString());
+
+        Assert.Equal("input_audio", contentItems[1].GetProperty("type").GetString());
+
+        // Check for the audio data
+        Assert.True(contentItems[1].TryGetProperty("input_audio", out var audioData));
+        Assert.Equal(JsonValueKind.Object, audioData.ValueKind);
+        Assert.True(audioData.TryGetProperty("data", out var dataProperty));
+        var base64Audio = dataProperty.GetString();
+        Assert.True(audioData.TryGetProperty("format", out var formatProperty));
+        Assert.Equal("mp3", formatProperty.GetString());
+
+        Assert.NotNull(base64Audio);
+        Assert.Equal(Convert.ToBase64String(s_sampleAudioBytes), base64Audio);
+    }
+
+    [Fact]
+    public async Task ItHandlesAudioContentInResponseAsync()
+    {
+        // Arrange
+        var service = new AzureOpenAIChatCompletionService("deployment", "https://endpoint", "api-key", "model-id", this._httpClient);
+
+        // Create a response with audio content
+        var responseJson = """
+    {
+        "model": "gpt-4o",
+        "choices": [
+            {
+                "message": {
+                    "role": "assistant",
+                    "content": "This is the text response.",
+                    "audio": {
+                        "data": "AQIDBA=="
+                    }
+                },
+                "finish_reason": "stop"
+            }
+        ],
+        "usage": {
+            "prompt_tokens": 10,
+            "completion_tokens": 20,
+            "total_tokens": 30
+        }
+    }
+    """;
+
+        using var responseMessage = new HttpResponseMessage(HttpStatusCode.OK)
+        {
+            Content = new StringContent(responseJson)
+        };
+        this._messageHandlerStub.ResponsesToReturn.Add(responseMessage);
+
+        var settings = new AzureOpenAIPromptExecutionSettings
+        {
+            Modalities = ChatResponseModalities.Text | ChatResponseModalities.Audio,
+            Audio = new ChatAudioOptions(ChatOutputAudioVoice.Alloy, ChatOutputAudioFormat.Mp3)
+        };
+
+        // Act
+        var result = await service.GetChatMessageContentAsync(new ChatHistory("test"), settings);
+
+        // Assert
+        Assert.NotNull(result);
+        Assert.Equal("This is the text response.", result.Content);
+        Assert.Equal(2, result.Items.Count);
+
+        var textContent = result.Items[0] as TextContent;
+        Assert.NotNull(textContent);
+        Assert.Equal("This is the text response.", textContent.Text);
+
+        var audioContent = result.Items[1] as AudioContent;
+        Assert.NotNull(audioContent);
+        Assert.NotNull(audioContent.Data);
+        Assert.Equal(4, audioContent.Data.Value.Length);
+        Assert.Equal(s_sampleAudioBytes[0], audioContent.Data.Value.Span[0]);
+        Assert.Equal(s_sampleAudioBytes[1], audioContent.Data.Value.Span[1]);
+        Assert.Equal(s_sampleAudioBytes[2], audioContent.Data.Value.Span[2]);
+        Assert.Equal(s_sampleAudioBytes[3], audioContent.Data.Value.Span[3]);
+    }
+
+    [Fact]
+    public async Task ItHandlesAudioContentWithMetadataInResponseAsync()
+    {
+        // Arrange
+        var service = new AzureOpenAIChatCompletionService("deployment", "https://endpoint", "api-key", "model-id", this._httpClient);
+
+        // Create a response with audio content including metadata
+        var responseJson = """
+    {
+        "model": "gpt-4o",
+        "choices": [
+            {
+                "message": {
+                    "role": "assistant",
+                    "content": "This is the text response.",
+                    "audio": {
+                        "id": "audio-123456",
+                        "data": "AQIDBA==",
+                        "transcript": "This is the audio transcript.",
+                        "expires_at": 1698765432
+                    }
+                },
+                "finish_reason": "stop"
+            }
+        ],
+        "usage": {
+            "prompt_tokens": 10,
+            "completion_tokens": 20,
+            "total_tokens": 30
+        }
+    }
+    """;
+
+        using var responseMessage = new HttpResponseMessage(HttpStatusCode.OK)
+        {
+            Content = new StringContent(responseJson)
+        };
+        this._messageHandlerStub.ResponsesToReturn.Add(responseMessage);
+
+        var settings = new AzureOpenAIPromptExecutionSettings
+        {
+            Modalities = ChatResponseModalities.Text | ChatResponseModalities.Audio,
+            Audio = new ChatAudioOptions(ChatOutputAudioVoice.Alloy, ChatOutputAudioFormat.Mp3)
+        };
+
+        // Act
+        var result = await service.GetChatMessageContentAsync(new ChatHistory("test"), settings);
+
+        // Assert
+        Assert.NotNull(result);
+        Assert.Equal("This is the text response.", result.Content);
+        Assert.Equal(2, result.Items.Count);
+
+        var textContent = result.Items[0] as TextContent;
+        Assert.NotNull(textContent);
+        Assert.Equal("This is the text response.", textContent.Text);
+
+        var audioContent = result.Items[1] as AudioContent;
+        Assert.NotNull(audioContent);
+        Assert.NotNull(audioContent.Data);
+        Assert.Equal(4, audioContent.Data.Value.Length);
+        Assert.Equal(s_sampleAudioBytes[0], audioContent.Data.Value.Span[0]);
+        Assert.Equal(s_sampleAudioBytes[1], audioContent.Data.Value.Span[1]);
+        Assert.Equal(s_sampleAudioBytes[2], audioContent.Data.Value.Span[2]);
+        Assert.Equal(s_sampleAudioBytes[3], audioContent.Data.Value.Span[3]);
+
+        // Verify audio metadata
+        Assert.NotNull(audioContent.Metadata);
+        Assert.Equal("audio-123456", audioContent.Metadata["Id"]);
+        Assert.Equal("This is the audio transcript.", audioContent.Metadata["Transcript"]);
+        Assert.NotNull(audioContent.Metadata["ExpiresAt"]);
+        // The ExpiresAt value is converted to a DateTime object, so we can't directly compare it to the Unix timestamp
+    }
+
+    [Theory]
+    [MemberData(nameof(ResponseModalitiesData))]
+    public async Task ItCreatesCorrectResponseModalitiesAsync(object responseModalities, string expectedJson)
+    {
+        // Arrange
+        var service = new AzureOpenAIChatCompletionService("deployment", "https://endpoint", "api-key", "model-id", this._httpClient);
+
+        using var responseMessage = new HttpResponseMessage(HttpStatusCode.OK)
+        {
+            Content = new StringContent(AzureOpenAITestHelper.GetTestResponse("chat_completion_test_response.json"))
+        };
+        this._messageHandlerStub.ResponsesToReturn.Add(responseMessage);
+
+        var settings = new AzureOpenAIPromptExecutionSettings
+        {
+            Modalities = responseModalities
+        };
+
+        // Act
+        await service.GetChatMessageContentsAsync(new ChatHistory("test"), settings);
+
+        // Assert
+        var actualRequestContent = Encoding.UTF8.GetString(this._messageHandlerStub.RequestContents[0]!);
+        Assert.NotNull(actualRequestContent);
+        var optionsJson = JsonSerializer.Deserialize<JsonElement>(actualRequestContent);
+        Assert.True(optionsJson.TryGetProperty("modalities", out var property));
+        Assert.Equal(expectedJson, property.GetRawText());
+    }
+
+    [Theory]
+    [MemberData(nameof(ResponseModalitiesData))]
+    public async Task ItCreatesCorrectResponseModalitiesStreamingAsync(object responseModalities, string expectedJson)
+    {
+        // Arrange
+        var service = new AzureOpenAIChatCompletionService("deployment", "https://endpoint", "api-key", "model-id", this._httpClient);
+        using var stream = new MemoryStream(Encoding.UTF8.GetBytes(AzureOpenAITestHelper.GetTestResponse("chat_completion_streaming_test_response.txt")));
+
+        using var responseMessage = new HttpResponseMessage(HttpStatusCode.OK)
+        {
+            Content = new StreamContent(stream)
+        };
+        this._messageHandlerStub.ResponsesToReturn.Add(responseMessage);
+
+        var settings = new AzureOpenAIPromptExecutionSettings
+        {
+            Modalities = responseModalities
+        };
+
+        // Act
+        var asyncEnumerable = service.GetStreamingChatMessageContentsAsync(new ChatHistory("test"), settings);
+        await asyncEnumerable.GetAsyncEnumerator().MoveNextAsync();
+
+        // Assert
+        var actualRequestContent = Encoding.UTF8.GetString(this._messageHandlerStub.RequestContents[0]!);
+        Assert.NotNull(actualRequestContent);
+        var optionsJson = JsonSerializer.Deserialize<JsonElement>(actualRequestContent);
+        Assert.True(optionsJson.TryGetProperty("modalities", out var property));
+        Assert.Equal(expectedJson, property.GetRawText());
+    }
+
+    [Theory]
+    [MemberData(nameof(AudioOptionsData))]
+    public async Task ItCreatesCorrectAudioOptionsAsync(object audioOptions, string expectedJson)
+    {
+        // Arrange
+        var service = new AzureOpenAIChatCompletionService("deployment", "https://endpoint", "api-key", "model-id", this._httpClient);
+
+        using var responseMessage = new HttpResponseMessage(HttpStatusCode.OK)
+        {
+            Content = new StringContent(AzureOpenAITestHelper.GetTestResponse("chat_completion_test_response.json"))
+        };
+        this._messageHandlerStub.ResponsesToReturn.Add(responseMessage);
+
+        var settings = new AzureOpenAIPromptExecutionSettings
+        {
+            Audio = audioOptions
+        };
+
+        // Act
+        await service.GetChatMessageContentsAsync(new ChatHistory("test"), settings);
+
+        // Assert
+        var actualRequestContent = Encoding.UTF8.GetString(this._messageHandlerStub.RequestContents[0]!);
+        Assert.NotNull(actualRequestContent);
+        var optionsJson = JsonSerializer.Deserialize<JsonElement>(actualRequestContent);
+        Assert.True(optionsJson.TryGetProperty("audio", out var property));
+        Assert.Equal(JsonValueKind.Object, property.ValueKind);
+        Assert.Equal(expectedJson, property.GetRawText());
+    }
+
+    [Theory]
+    [MemberData(nameof(AudioOptionsData))]
+    public async Task ItCreatesCorrectAudioOptionsStreamingAsync(object audioOptions, string expectedJson)
+    {
+        // Arrange
+        var service = new AzureOpenAIChatCompletionService("deployment", "https://endpoint", "api-key", "model-id", this._httpClient);
+        using var stream = new MemoryStream(Encoding.UTF8.GetBytes(AzureOpenAITestHelper.GetTestResponse("chat_completion_streaming_test_response.txt")));
+
+        using var responseMessage = new HttpResponseMessage(HttpStatusCode.OK)
+        {
+            Content = new StreamContent(stream)
+        };
+        this._messageHandlerStub.ResponsesToReturn.Add(responseMessage);
+
+        var settings = new AzureOpenAIPromptExecutionSettings
+        {
+            Audio = audioOptions
+        };
+
+        // Act
+        var asyncEnumerable = service.GetStreamingChatMessageContentsAsync(new ChatHistory("test"), settings);
+        await asyncEnumerable.GetAsyncEnumerator().MoveNextAsync();
+
+        // Assert
+        var actualRequestContent = Encoding.UTF8.GetString(this._messageHandlerStub.RequestContents[0]!);
+        Assert.NotNull(actualRequestContent);
+        var optionsJson = JsonSerializer.Deserialize<JsonElement>(actualRequestContent);
+        Assert.True(optionsJson.TryGetProperty("audio", out var property));
+        Assert.Equal(JsonValueKind.Object, property.ValueKind);
+        Assert.Equal(expectedJson, property.GetRawText());
+    }
+
+    // Add these theory data members to the class:
+
+    public static TheoryData<object, string> ResponseModalitiesData => new()
+{
+    { ChatResponseModalities.Text, "[\"text\"]" },
+    { ChatResponseModalities.Audio, "[\"audio\"]" },
+    { ChatResponseModalities.Text | ChatResponseModalities.Audio, "[\"text\",\"audio\"]" },
+    { new[] { "text" }, "[\"text\"]" },
+    { new[] { "audio" }, "[\"audio\"]" },
+    { new[] { "text", "audio" }, "[\"text\",\"audio\"]" },
+    { "Text", "[\"text\"]" },
+    { "Audio", "[\"audio\"]" },
+    { JsonSerializer.Deserialize<JsonElement>("\"text\""), "[\"text\"]" },
+    { JsonSerializer.Deserialize<JsonElement>("\"audio\""), "[\"audio\"]" },
+    { JsonSerializer.Deserialize<JsonElement>("[\"text\", \"audio\"]"), "[\"text\",\"audio\"]" },
+};
+
+    public static TheoryData<object, string> AudioOptionsData => new()
+{
+    { new ChatAudioOptions(ChatOutputAudioVoice.Alloy, ChatOutputAudioFormat.Mp3), "{\"voice\":\"alloy\",\"format\":\"mp3\"}" },
+    { new ChatAudioOptions(ChatOutputAudioVoice.Echo, ChatOutputAudioFormat.Opus), "{\"voice\":\"echo\",\"format\":\"opus\"}" },
+    { JsonSerializer.Deserialize<JsonElement>("{\"voice\":\"alloy\",\"format\":\"mp3\"}"), "{\"voice\":\"alloy\",\"format\":\"mp3\"}" },
+    { "{\"voice\":\"echo\",\"format\":\"opus\"}", "{\"voice\":\"echo\",\"format\":\"opus\"}" },
+};
+
     public static TheoryData<string?, string?> Versions => new()
     {
         { "V2025_03_01_preview", "2025-03-01-preview" },