Make the OpenAiAudioApi return ResponseEntity<value>

tzolov · tzolov · commit 54930af220e4 · 2024-03-03T10:49:09.000+01:00
diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/api/OpenAiAudioApi.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/api/OpenAiAudioApi.java
@@ -24,6 +24,7 @@
 
 import org.springframework.ai.openai.api.common.ApiUtils;
 import org.springframework.core.io.ByteArrayResource;
+import org.springframework.http.ResponseEntity;
 import org.springframework.util.Assert;
 import org.springframework.util.LinkedMultiValueMap;
 import org.springframework.util.MultiValueMap;
@@ -561,18 +562,19 @@ public record Segment(
 	/**
 	 * Request to generates audio from the input text.
 	 * @param requestBody The request body.
-	 * @return The audio file in bytes.
+	 * @return Response entity containing the audio binary.
 	 */
-	public byte[] createSpeech(SpeechRequest requestBody) {
-		return this.restClient.post().uri("/v1/audio/speech").body(requestBody).retrieve().body(byte[].class);
+	public ResponseEntity<byte[]> createSpeech(SpeechRequest requestBody) {
+		return this.restClient.post().uri("/v1/audio/speech").body(requestBody).retrieve().toEntity(byte[].class);
 	}
 
 	/**
 	 * Transcribes audio into the input language.
 	 * @param requestBody The request body.
-	 * @return The transcribed text.
+	 * @return Response entity containing the transcribed text in either json or text
+	 * format.
 	 */
-	public Object createTranscription(TranscriptionRequest requestBody) {
+	public ResponseEntity<?> createTranscription(TranscriptionRequest requestBody) {
 		return createTranscription(requestBody, requestBody.responseFormat().getResponseType());
 	}
 
@@ -582,9 +584,9 @@ public Object createTranscription(TranscriptionRequest requestBody) {
 	 * @param <T> The response type.
 	 * @param requestBody The request body.
 	 * @param responseType The response type class.
-	 * @return The transcribed text.
+	 * @return Response entity containing the transcribed text in the responseType format.
 	 */
-	public <T> T createTranscription(TranscriptionRequest requestBody, Class<T> responseType) {
+	public <T> ResponseEntity<T> createTranscription(TranscriptionRequest requestBody, Class<T> responseType) {
 
 		MultiValueMap<String, Object> multipartBody = new LinkedMultiValueMap<>();
 		multipartBody.add("file", new ByteArrayResource(requestBody.file()) {
@@ -604,15 +606,20 @@ public String getFilename() {
 			multipartBody.add("timestamp_granularities[]", requestBody.granularityType().getValue());
 		}
 
-		return this.restClient.post().uri("/v1/audio/transcriptions").body(multipartBody).retrieve().body(responseType);
+		return this.restClient.post()
+			.uri("/v1/audio/transcriptions")
+			.body(multipartBody)
+			.retrieve()
+			.toEntity(responseType);
 	}
 
 	/**
 	 * Translates audio into English.
 	 * @param requestBody The request body.
-	 * @return The transcribed text.
+	 * @return Response entity containing the transcribed text in either json or text
+	 * format.
 	 */
-	public Object createTranslation(TranslationRequest requestBody) {
+	public ResponseEntity<?> createTranslation(TranslationRequest requestBody) {
 		return createTranslation(requestBody, requestBody.responseFormat().getResponseType());
 	}
 
@@ -622,9 +629,9 @@ public Object createTranslation(TranslationRequest requestBody) {
 	 * @param <T> The response type.
 	 * @param requestBody The request body.
 	 * @param responseType The response type class.
-	 * @return The transcribed text.
+	 * @return Response entity containing the transcribed text in the responseType format.
 	 */
-	public <T> T createTranslation(TranslationRequest requestBody, Class<T> responseType) {
+	public <T> ResponseEntity<T> createTranslation(TranslationRequest requestBody, Class<T> responseType) {
 
 		MultiValueMap<String, Object> multipartBody = new LinkedMultiValueMap<>();
 		multipartBody.add("file", new ByteArrayResource(requestBody.file()) {
@@ -638,7 +645,11 @@ public String getFilename() {
 		multipartBody.add("response_format", requestBody.responseFormat().getValue());
 		multipartBody.add("temperature", requestBody.temperature());
 
-		return this.restClient.post().uri("/v1/audio/translations").body(multipartBody).retrieve().body(responseType);
+		return this.restClient.post()
+			.uri("/v1/audio/translations")
+			.body(multipartBody)
+			.retrieve()
+			.toEntity(responseType);
 	}
 
 }
diff --git a/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/audio/api/OpenAiAudioApiIT.java b/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/audio/api/OpenAiAudioApiIT.java
@@ -28,6 +28,7 @@
 import org.springframework.ai.openai.api.OpenAiAudioApi.StructuredResponse;
 import org.springframework.ai.openai.api.OpenAiAudioApi.TranslationRequest;
 import org.springframework.ai.openai.api.OpenAiAudioApi.SpeechRequest.Voice;
+import org.springframework.lang.NonNull;
 import org.springframework.ai.openai.api.OpenAiAudioApi.TtsModel;
 import org.springframework.ai.openai.api.OpenAiAudioApi.WhisperModel;
 import org.springframework.util.FileCopyUtils;
@@ -42,34 +43,42 @@ public class OpenAiAudioApiIT {
 
 	OpenAiAudioApi audioApi = new OpenAiAudioApi(System.getenv("OPENAI_API_KEY"));
 
+	@SuppressWarnings("null")
 	@Test
 	void speechTranscriptionAndTranslation() throws IOException {
 
-		byte[] speech = audioApi.createSpeech(SpeechRequest.builder()
-			.withModel(TtsModel.TTS_1_HD.getValue())
-			.withInput("Hello, my name is Chris and I love Spring A.I.")
-			.withVoice(Voice.ONYX)
-			.build());
+		byte[] speech = audioApi
+			.createSpeech(SpeechRequest.builder()
+				.withModel(TtsModel.TTS_1_HD.getValue())
+				.withInput("Hello, my name is Chris and I love Spring A.I.")
+				.withVoice(Voice.ONYX)
+				.build())
+			.getBody();
 
 		assertThat(speech).isNotEmpty();
 
 		FileCopyUtils.copy(speech, new File("target/speech.mp3"));
 
-		StructuredResponse translation = audioApi.createTranslation(
-				TranslationRequest.builder().withModel(WhisperModel.WHISPER_1.getValue()).withFile(speech).build(),
-				StructuredResponse.class);
+		StructuredResponse translation = audioApi
+			.createTranslation(
+					TranslationRequest.builder().withModel(WhisperModel.WHISPER_1.getValue()).withFile(speech).build(),
+					StructuredResponse.class)
+			.getBody();
 
 		assertThat(translation.text().replaceAll(",", "")).isEqualTo("Hello my name is Chris and I love Spring AI.");
 
 		StructuredResponse transcriptionEnglish = audioApi.createTranscription(
 				TranscriptionRequest.builder().withModel(WhisperModel.WHISPER_1.getValue()).withFile(speech).build(),
-				StructuredResponse.class);
+				StructuredResponse.class)
+			.getBody();
 
 		assertThat(transcriptionEnglish.text().replaceAll(",", ""))
 			.isEqualTo("Hello my name is Chris and I love Spring AI.");
 
-		StructuredResponse transcriptionDutch = audioApi.createTranscription(
-				TranscriptionRequest.builder().withFile(speech).withLanguage("nl").build(), StructuredResponse.class);
+		StructuredResponse transcriptionDutch = audioApi
+			.createTranscription(TranscriptionRequest.builder().withFile(speech).withLanguage("nl").build(),
+					StructuredResponse.class)
+			.getBody();
 
 		assertThat(transcriptionDutch.text()).isEqualTo("Hallo, mijn naam is Chris en ik hou van Spring AI.");
 	}