spring-projects
diff --git a/‎models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiSpeechClient.java
Lines changed: 145 additions & 0 deletions b/‎models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiSpeechClient.java
Lines changed: 145 additions & 0 deletions
diff --git a/‎models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiSpeechOptions.java
Lines changed: 171 additions & 0 deletions b/‎models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiSpeechOptions.java
Lines changed: 171 additions & 0 deletions
diff --git a/‎models/spring-ai-openai/src/main/java/org/springframework/ai/openai/api/OpenAiApi.java
Lines changed: 63 additions & 0 deletions b/‎models/spring-ai-openai/src/main/java/org/springframework/ai/openai/api/OpenAiApi.java
Lines changed: 63 additions & 0 deletions
@@ -0,0 +1,145 @@
+/*
+ * Copyright 2023-2023 the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.springframework.ai.openai;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.ai.chat.metadata.RateLimit;
+import org.springframework.ai.model.ModelOptionsUtils;
+import org.springframework.ai.openai.api.OpenAiApi;
+import org.springframework.ai.openai.api.OpenAiApi.OpenAiApiException;
+import org.springframework.ai.openai.metadata.OpenAiSpeechResponseMetadata;
+import org.springframework.ai.openai.metadata.support.OpenAiResponseHeaderExtractor;
+import org.springframework.ai.speech.*;
+import org.springframework.http.ResponseEntity;
+import org.springframework.retry.support.RetryTemplate;
+import org.springframework.util.Assert;
+
+import java.time.Duration;
+import java.util.Objects;
+
+/**
+ * {@link SpeechClient} implementation for {@literal OpenAI} backed by {@link OpenAiApi}.
+ *
+ * @author Ahmed Yousri
+ * @see SpeechClient
+ * @see OpenAiApi
+ */
+public class OpenAiSpeechClient implements SpeechClient {
+
+	private final Logger logger = LoggerFactory.getLogger(getClass());
+
+	private OpenAiSpeechOptions defaultOptions = OpenAiSpeechOptions.builder()
+		.withModel("tts-1")
+		.withResponseFormat("mp3")
+		.withSpeed(1.0f)
+		.withVoice("alloy")
+		.build();
+
+	public final RetryTemplate retryTemplate = RetryTemplate.builder()
+		.maxAttempts(10)
+		.retryOn(OpenAiApiException.class)
+		.exponentialBackoff(Duration.ofMillis(2000), 5, Duration.ofMillis(3 * 60000))
+		.build();
+
+	private final OpenAiApi openAiApi;
+
+	public OpenAiSpeechClient(OpenAiApi openAiApi) {
+		Assert.notNull(openAiApi, "OpenAiApi must not be null");
+		this.openAiApi = openAiApi;
+	}
+
+	public OpenAiSpeechClient withDefaultOptions(OpenAiSpeechOptions options) {
+		this.defaultOptions = options;
+		return this;
+	}
+
+	@Override
+	public SpeechResponse call(SpeechPrompt speechPrompt) {
+
+		return this.retryTemplate.execute(ctx -> {
+
+			String instructions = speechPrompt.getInstructions().get(0).getText();
+
+			OpenAiApi.SpeechRequest speechRequest = new OpenAiApi.SpeechRequest(instructions);
+
+			if (this.defaultOptions != null) {
+				speechRequest = ModelOptionsUtils.merge(this.defaultOptions, speechRequest,
+						OpenAiApi.SpeechRequest.class);
+			}
+
+			if (speechPrompt.getOptions() != null) {
+				speechRequest = ModelOptionsUtils.merge(toOpenAiSpeechOptions(speechPrompt.getOptions()), speechRequest,
+						OpenAiApi.SpeechRequest.class);
+			}
+
+			ResponseEntity<OpenAiApi.SpeechResponse> SpeechEntity = this.openAiApi
+				.textToSpeechEntityJson(speechRequest);
+			var speech = SpeechEntity.getBody();
+
+			if (speech == null) {
+				logger.warn("No speech response returned for speechRequest: {}", speechRequest);
+				return new SpeechResponse(convertResponse(OpenAiApi.SpeechResponse.NULL));
+			}
+
+			RateLimit rateLimits = OpenAiResponseHeaderExtractor.extractAiResponseHeaders(SpeechEntity);
+
+			return new SpeechResponse(convertResponse(speech), new OpenAiSpeechResponseMetadata(rateLimits));
+
+		});
+	}
+
+	private Speech convertResponse(OpenAiApi.SpeechResponse speechResponse) {
+		return new Speech(speechResponse.audio());
+	}
+
+	private OpenAiSpeechOptions toOpenAiSpeechOptions(SpeechOptions runtimeSpeechOptions) {
+		OpenAiSpeechOptions.Builder openAiSpeechOptionBuilder = OpenAiSpeechOptions.builder();
+		if (runtimeSpeechOptions != null) {
+			// Handle portable speech options
+			if (runtimeSpeechOptions.getModel() != null) {
+				openAiSpeechOptionBuilder.withModel(runtimeSpeechOptions.getModel());
+			}
+			if (runtimeSpeechOptions.getResponseFormat() != null) {
+				openAiSpeechOptionBuilder.withResponseFormat(runtimeSpeechOptions.getResponseFormat());
+			}
+			if (runtimeSpeechOptions.getSpeed() != null) {
+				openAiSpeechOptionBuilder.withSpeed(runtimeSpeechOptions.getSpeed());
+			}
+			if (runtimeSpeechOptions.getVoice() != null) {
+				openAiSpeechOptionBuilder.withVoice(runtimeSpeechOptions.getVoice());
+			}
+			// Handle OpenAI specific speech options
+			if (runtimeSpeechOptions instanceof OpenAiSpeechOptions) {
+				OpenAiSpeechOptions runtimeOpenAiSpeechOptions = (OpenAiSpeechOptions) runtimeSpeechOptions;
+				if (runtimeOpenAiSpeechOptions.getModel() != null) {
+					openAiSpeechOptionBuilder.withModel(runtimeOpenAiSpeechOptions.getModel());
+				}
+				if (runtimeOpenAiSpeechOptions.getSpeed() != null) {
+					openAiSpeechOptionBuilder.withSpeed(runtimeOpenAiSpeechOptions.getSpeed());
+				}
+				if (runtimeOpenAiSpeechOptions.getVoice() != null) {
+					openAiSpeechOptionBuilder.withVoice(runtimeOpenAiSpeechOptions.getVoice());
+				}
+				if (runtimeOpenAiSpeechOptions.getResponseFormat() != null) {
+					openAiSpeechOptionBuilder.withResponseFormat(runtimeOpenAiSpeechOptions.getResponseFormat());
+				}
+			}
+		}
+		return openAiSpeechOptionBuilder.build();
+	}
+
+}
@@ -0,0 +1,171 @@
+/*
+ * Copyright 2024-2024 the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.springframework.ai.openai;
+
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonInclude.Include;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import org.springframework.ai.speech.SpeechOptions;
+
+/**
+ * @author Ahmed Yousri
+ */
+@JsonInclude(Include.NON_NULL)
+public class OpenAiSpeechOptions implements SpeechOptions {
+
+	// @formatter:off
+	/**
+	 *
+	 * One of the available TTS models
+	 */
+	private @JsonProperty("model") String model;
+
+	/**
+	 * The voice to use when generating the audio.
+	 */
+	private @JsonProperty("voice") String voice;
+
+	/**
+	 * The format to audio in.
+	 */
+	private @JsonProperty("response_format") String responseFormat;
+
+	/**
+	 * The speed of the generated audi.
+	 */
+	private @JsonProperty("speed") Float speed;
+
+
+	public static Builder builder() {
+		return new Builder();
+	}
+
+	public static class Builder {
+
+		protected OpenAiSpeechOptions options;
+
+		public Builder() {
+			this.options = new OpenAiSpeechOptions();
+		}
+
+		public Builder(OpenAiSpeechOptions options) {
+			this.options = options;
+		}
+
+		public Builder withModel(String model) {
+			options.model = model;
+			return this;
+		}
+
+		public Builder withVoice(String voice) {
+			options.voice = voice;
+			return this;
+		}
+
+		public Builder withResponseFormat(String responseFormat) {
+			options.responseFormat = responseFormat;
+			return this;
+		}
+
+		public Builder withSpeed(Float speed) {
+			options.speed = speed;
+			return this;
+		}
+
+		public OpenAiSpeechOptions build() {
+			return this.options;
+		}
+
+	}
+
+	public String getModel() {
+		return model;
+	}
+
+	public void setModel(String model) {
+		this.model = model;
+	}
+
+	public String getVoice() {
+		return voice;
+	}
+
+	public void setVoice(String voice) {
+		this.voice = voice;
+	}
+
+	public String getResponseFormat() {
+		return responseFormat;
+	}
+
+	public void setResponseFormat(String responseFormat) {
+		this.responseFormat = responseFormat;
+	}
+
+	public Float getSpeed() {
+		return speed;
+	}
+
+
+	public void setSpeed(Float speed) {
+		this.speed = speed;
+	}
+
+	@Override
+	public int hashCode() {
+		final int prime = 31;
+		int result = 1;
+		result = prime * result + ((model == null) ? 0 : model.hashCode());
+		result = prime * result + ((voice == null) ? 0 : voice.hashCode());
+		result = prime * result + ((responseFormat == null) ? 0 : responseFormat.hashCode());
+		result = prime * result + ((speed == null) ? 0 : speed.hashCode());
+		return result;
+	}
+
+	@Override
+	public boolean equals(Object obj) {
+		if (this == obj)
+			return true;
+		if (obj == null)
+			return false;
+		if (getClass() != obj.getClass())
+			return false;
+		OpenAiSpeechOptions other = (OpenAiSpeechOptions) obj;
+		if (model == null) {
+			if (other.model != null)
+				return false;
+		} else if (!model.equals(other.model))
+			return false;
+		if (voice == null) {
+			if (other.voice != null)
+				return false;
+		} else if (!voice.equals(other.voice))
+			return false;
+		if (responseFormat == null) {
+			if (other.responseFormat != null)
+				return false;
+		} else if (!responseFormat.equals(other.responseFormat))
+			return false;
+		if (speed == null) {
+			if (other.speed != null)
+				return false;
+		} else if (!speed.equals(other.speed))
+			return false;
+		return true;
+	}
+
+}
@@ -389,6 +389,68 @@ public record ResponseFormat(
 		}
 	}
 
+
+
+	@JsonInclude(JsonInclude.Include.NON_NULL)
+	public record SpeechRequest(
+			@JsonProperty("input") String prompt,
+
+			@JsonProperty("model") String model,
+			@JsonProperty("voice") String voice,
+			@JsonProperty("response_format") String responseFormat,
+			@JsonProperty("speed") Double speed) {
+
+		public static SpeechRequest NULL = new SpeechRequest();
+
+		public SpeechRequest(String model, String input, String voice) {
+			this(model, input, voice, "mp3", 1.0); // Defaults to "mp3" format and "1.0" speed
+		}
+
+		public SpeechRequest( String input) {
+			this(input,"tts-1", "alloy", "mp3", 1.0); // Defaults to "mp3" format and "1.0" speed
+		}
+
+		public SpeechRequest() {
+			this(null, null, null, "mp3", 1.0);
+		}
+
+		public SpeechRequest(String prompt, String model, String voice, String responseFormat, Double speed) {
+			this.model = model;
+			this.prompt = prompt;
+			this.voice = voice;
+			this.responseFormat = responseFormat;
+			this.speed = speed;
+		}
+	}
+
+	@JsonInclude(Include.NON_NULL)
+	public record SpeechResponse(
+
+			@JsonProperty("audio") byte[] audio) {
+		public static SpeechResponse NULL = new SpeechResponse(new byte[0]);
+
+	}
+
+	/**
+	 * Creates a model response for the given text-to-speech request.
+	 *
+	 * @param speechRequest The text-to-speech request.
+	 * @return Entity response with the generated speech as a body and HTTP status code and headers.
+	 */
+	public ResponseEntity<SpeechResponse> textToSpeechEntityJson(OpenAiApi.SpeechRequest speechRequest) {
+		Assert.notNull(speechRequest, "The request body cannot be null.");
+
+		var responseEntity = this.restClient.post()
+				.uri("/v1/audio/speech")
+				.body(speechRequest)
+				.accept(MediaType.APPLICATION_OCTET_STREAM)
+				.retrieve()
+				.toEntity(byte[].class);
+		HttpHeaders headers = new HttpHeaders();
+		headers.addAll(responseEntity.getHeaders());
+		SpeechResponse speechResponse = new SpeechResponse(responseEntity.getBody());
+		return new ResponseEntity<>(speechResponse, headers, responseEntity.getStatusCode());
+	}
 	/**
 	 * Message comprising the conversation.
 	 *
@@ -494,6 +556,7 @@ public enum ChatCompletionFinishReason {
 		@JsonProperty("function_call") FUNCTION_CALL
 	}
 
+
 	/**
 	 * Represents a chat completion response returned by model, based on the provided input.
 	 *