Added support for OpenAI Text to Audio (Speech API ) stream

Mohammed, Ahmed yousri salama (Canada) · Mohammed, Ahmed yousri salama (Canada) · commit 8039a5fdc727 · 2024-02-13T11:00:49.000-05:00
diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiSpeechClient.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiSpeechClient.java
@@ -27,9 +27,9 @@
 import org.springframework.http.ResponseEntity;
 import org.springframework.retry.support.RetryTemplate;
 import org.springframework.util.Assert;
+import reactor.core.publisher.Flux;
 
 import java.time.Duration;
-import java.util.Objects;
 
 /**
  * {@link SpeechClient} implementation for {@literal OpenAI} backed by {@link OpenAiApi}.
@@ -38,7 +38,7 @@
  * @see SpeechClient
  * @see OpenAiApi
  */
-public class OpenAiSpeechClient implements SpeechClient {
+public class OpenAiSpeechClient implements SpeechClient, StreamingSpeechClient {
 
 	private final Logger logger = LoggerFactory.getLogger(getClass());
 
@@ -72,19 +72,7 @@ public SpeechResponse call(SpeechPrompt speechPrompt) {
 
 		return this.retryTemplate.execute(ctx -> {
 
-			String instructions = speechPrompt.getInstructions().get(0).getText();
-
-			OpenAiApi.SpeechRequest speechRequest = new OpenAiApi.SpeechRequest(instructions);
-
-			if (this.defaultOptions != null) {
-				speechRequest = ModelOptionsUtils.merge(this.defaultOptions, speechRequest,
-						OpenAiApi.SpeechRequest.class);
-			}
-
-			if (speechPrompt.getOptions() != null) {
-				speechRequest = ModelOptionsUtils.merge(toOpenAiSpeechOptions(speechPrompt.getOptions()), speechRequest,
-						OpenAiApi.SpeechRequest.class);
-			}
+			OpenAiApi.SpeechRequest speechRequest = createRequest(speechPrompt);
 
 			ResponseEntity<OpenAiApi.SpeechResponse> SpeechEntity = this.openAiApi
 				.textToSpeechEntityJson(speechRequest);
@@ -102,6 +90,23 @@ public SpeechResponse call(SpeechPrompt speechPrompt) {
 		});
 	}
 
+	private OpenAiApi.SpeechRequest createRequest(SpeechPrompt speechPrompt) {
+		String instructions = speechPrompt.getInstructions().get(0).getText();
+
+		OpenAiApi.SpeechRequest speechRequest = new OpenAiApi.SpeechRequest(instructions);
+
+		if (this.defaultOptions != null) {
+			speechRequest = ModelOptionsUtils.merge(this.defaultOptions, speechRequest,
+					OpenAiApi.SpeechRequest.class);
+		}
+
+		if (speechPrompt.getOptions() != null) {
+			speechRequest = ModelOptionsUtils.merge(toOpenAiSpeechOptions(speechPrompt.getOptions()), speechRequest,
+					OpenAiApi.SpeechRequest.class);
+		}
+		return speechRequest;
+	}
+
 	private Speech convertResponse(OpenAiApi.SpeechResponse speechResponse) {
 		return new Speech(speechResponse.audio());
 	}
@@ -142,4 +147,12 @@ private OpenAiSpeechOptions toOpenAiSpeechOptions(SpeechOptions runtimeSpeechOpt
 		return openAiSpeechOptionBuilder.build();
 	}
 
+	@Override
+	public Flux<SpeechResponse> stream(SpeechPrompt prompt) {
+		return this.openAiApi.textToSpeechStreaming(this.createRequest(prompt))
+				.map(entity -> new SpeechResponse(
+						new Speech(entity.getBody()),
+						new OpenAiSpeechResponseMetadata(OpenAiResponseHeaderExtractor.extractAiResponseHeaders(entity))
+				));
+	}
 }
diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/api/OpenAiApi.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/api/OpenAiApi.java
@@ -451,6 +451,19 @@ public ResponseEntity<SpeechResponse> textToSpeechEntityJson(OpenAiApi.SpeechReq
 		SpeechResponse speechResponse = new SpeechResponse(responseEntity.getBody());
 		return new ResponseEntity<>(speechResponse, headers, responseEntity.getStatusCode());
 	}
+
+	public Flux<ResponseEntity<byte[]>> textToSpeechStreaming(OpenAiApi.SpeechRequest speechRequest) {
+
+		return webClient.post()
+				.uri("/v1/audio/speech")
+				.body(Mono.just(speechRequest), SpeechRequest.class)
+				.accept(MediaType.APPLICATION_OCTET_STREAM)
+				.exchangeToFlux(clientResponse -> {
+					HttpHeaders headers = clientResponse.headers().asHttpHeaders();
+					return clientResponse.bodyToFlux(byte[].class)
+							.map(bytes -> ResponseEntity.ok().headers(headers).body(bytes));
+				});
+	}
 	/**
 	 * Message comprising the conversation.
 	 *
diff --git a/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/speech/OpenAiSpeechClientIT.java b/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/speech/OpenAiSpeechClientIT.java
@@ -9,6 +9,7 @@
 import org.springframework.ai.speech.SpeechPrompt;
 import org.springframework.ai.speech.SpeechResponse;
 import org.springframework.boot.test.context.SpringBootTest;
+import reactor.core.publisher.Flux;
 
 import static org.assertj.core.api.Assertions.assertThat;
 
@@ -34,4 +35,20 @@ void speechTest() {
 
 	}
 
+	@Test
+	void speechStreamingTest() {
+		SpeechOptions speechOptions = SpeechOptionsBuilder.builder()
+				.withVoice("shimmer")
+				.withSpeed(1.0f)
+				.withResponseFormat("mp3")
+				.withModel("tts-1-hd")
+				.build();
+		SpeechPrompt speechPrompt = new SpeechPrompt("Today is a wonderful day to build something people love!",
+				speechOptions);
+		Flux<SpeechResponse> response = streamingSpeechClient.stream(speechPrompt);
+		assertThat(response).isNotNull();
+		assertThat(response.collectList().block()).isNotNull();
+		System.out.println(response.collectList().block());
+	}
+
 }
diff --git a/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/testutils/AbstractIT.java b/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/testutils/AbstractIT.java
@@ -15,6 +15,7 @@
 import org.springframework.ai.chat.messages.SystemMessage;
 import org.springframework.ai.image.ImageClient;
 import org.springframework.ai.speech.SpeechClient;
+import org.springframework.ai.speech.StreamingSpeechClient;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Value;
 import org.springframework.core.io.Resource;
@@ -35,6 +36,9 @@ public abstract class AbstractIT {
 	@Autowired
 	protected SpeechClient openAiSpeechClient;
 
+	@Autowired
+	protected StreamingSpeechClient streamingSpeechClient;
+
 	@Autowired
 	protected StreamingChatClient openStreamingChatClient;
 
diff --git a/spring-ai-core/src/main/java/org/springframework/ai/speech/StreamingSpeechClient.java b/spring-ai-core/src/main/java/org/springframework/ai/speech/StreamingSpeechClient.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2023 the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.springframework.ai.speech;
+
+import org.springframework.ai.model.StreamingModelClient;
+import reactor.core.publisher.Flux;
+
+@FunctionalInterface
+public interface StreamingSpeechClient extends StreamingModelClient<SpeechPrompt, SpeechResponse> {
+
+	@Override
+	Flux<SpeechResponse> stream(SpeechPrompt prompt);
+
+}