Skip to content

Commit 14e7033

Browse files
dev-jonghoonparktzolov
authored andcommitted
refactor(openai): change voice parameter to string in OpenAI Audio Speech API (#2395)
This change modifies the voice parameter in OpenAI Audio Speech API from using the Voice enum directly to using the string value of the enum. This provides more flexibility for handling voice options, especially for custom voices or when voice names come from configuration. - Change voice parameter type from Voice enum to String - Add overloaded methods to accept both enum and string values - Update tests and documentation to reflect these changes Signed-off-by: jonghoon park <dev@jonghoonpark.com>
1 parent 3fcb10a commit 14e7033

File tree

10 files changed

+74
-43
lines changed

10 files changed

+74
-43
lines changed

auto-configurations/models/spring-ai-autoconfigure-model-openai/src/main/java/org/springframework/ai/model/openai/autoconfigure/OpenAiAudioSpeechProperties.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
*
3030
* @author Ahmed Yousri
3131
* @author Stefan Vassilev
32+
* @author Jonghoon Park
3233
*/
3334
@ConfigurationProperties(OpenAiAudioSpeechProperties.CONFIG_PREFIX)
3435
public class OpenAiAudioSpeechProperties extends OpenAiParentProperties {
@@ -39,7 +40,7 @@ public class OpenAiAudioSpeechProperties extends OpenAiParentProperties {
3940

4041
private static final Float SPEED = 1.0f;
4142

42-
private static final OpenAiAudioApi.SpeechRequest.Voice VOICE = OpenAiAudioApi.SpeechRequest.Voice.ALLOY;
43+
private static final String VOICE = OpenAiAudioApi.SpeechRequest.Voice.ALLOY.getValue();
4344

4445
private static final OpenAiAudioApi.SpeechRequest.AudioResponseFormat DEFAULT_RESPONSE_FORMAT = OpenAiAudioApi.SpeechRequest.AudioResponseFormat.MP3;
4546

auto-configurations/models/spring-ai-autoconfigure-model-openai/src/test/java/org/springframework/ai/model/openai/autoconfigure/OpenAiPropertiesTests.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2023-2024 the original author or authors.
2+
* Copyright 2023-2025 the original author or authors.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -40,6 +40,7 @@
4040
*
4141
* @author Christian Tzolov
4242
* @author Thomas Vitale
43+
* @author Jonghoon Park
4344
* @since 0.8.0
4445
*/
4546
public class OpenAiPropertiesTests {
@@ -177,7 +178,7 @@ public void speechProperties() {
177178

178179
assertThat(speechProperties.getOptions().getModel()).isEqualTo("TTS_1");
179180
assertThat(speechProperties.getOptions().getVoice())
180-
.isEqualTo(OpenAiAudioApi.SpeechRequest.Voice.ALLOY);
181+
.isEqualTo(OpenAiAudioApi.SpeechRequest.Voice.ALLOY.getValue());
181182
assertThat(speechProperties.getOptions().getResponseFormat())
182183
.isEqualTo(OpenAiAudioApi.SpeechRequest.AudioResponseFormat.MP3);
183184
assertThat(speechProperties.getOptions().getSpeed()).isEqualTo(0.75f);
@@ -205,7 +206,7 @@ public void speechPropertiesTest() {
205206

206207
assertThat(speechProperties.getOptions().getModel()).isEqualTo("TTS_1");
207208
assertThat(speechProperties.getOptions().getVoice())
208-
.isEqualTo(OpenAiAudioApi.SpeechRequest.Voice.ALLOY);
209+
.isEqualTo(OpenAiAudioApi.SpeechRequest.Voice.ALLOY.getValue());
209210
assertThat(speechProperties.getOptions().getResponseFormat())
210211
.isEqualTo(OpenAiAudioApi.SpeechRequest.AudioResponseFormat.MP3);
211212
assertThat(speechProperties.getOptions().getSpeed()).isEqualTo(0.75f);
@@ -237,7 +238,8 @@ public void speechOverrideConnectionPropertiesTest() {
237238
assertThat(speechProperties.getBaseUrl()).isEqualTo("TEST_BASE_URL2");
238239

239240
assertThat(speechProperties.getOptions().getModel()).isEqualTo("TTS_2");
240-
assertThat(speechProperties.getOptions().getVoice()).isEqualTo(OpenAiAudioApi.SpeechRequest.Voice.ECHO);
241+
assertThat(speechProperties.getOptions().getVoice())
242+
.isEqualTo(OpenAiAudioApi.SpeechRequest.Voice.ECHO.getValue());
241243
assertThat(speechProperties.getOptions().getResponseFormat())
242244
.isEqualTo(OpenAiAudioApi.SpeechRequest.AudioResponseFormat.OPUS);
243245
assertThat(speechProperties.getOptions().getSpeed()).isEqualTo(0.5f);

models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiAudioSpeechModel.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2023-2024 the original author or authors.
2+
* Copyright 2023-2025 the original author or authors.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -42,6 +42,7 @@
4242
* @author Ahmed Yousri
4343
* @author Hyunjoon Choi
4444
* @author Thomas Vitale
45+
* @author Jonghoon Park
4546
* @see OpenAiAudioApi
4647
* @since 1.0.0-M1
4748
*/
@@ -81,7 +82,7 @@ public OpenAiAudioSpeechModel(OpenAiAudioApi audioApi) {
8182
OpenAiAudioSpeechOptions.builder()
8283
.model(OpenAiAudioApi.TtsModel.TTS_1.getValue())
8384
.responseFormat(AudioResponseFormat.MP3)
84-
.voice(OpenAiAudioApi.SpeechRequest.Voice.ALLOY)
85+
.voice(OpenAiAudioApi.SpeechRequest.Voice.ALLOY.getValue())
8586
.speed(SPEED)
8687
.build());
8788
}

models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiAudioSpeechOptions.java

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2023-2024 the original author or authors.
2+
* Copyright 2023-2025 the original author or authors.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -29,14 +29,15 @@
2929
* @author Ahmed Yousri
3030
* @author Hyunjoon Choi
3131
* @author Ilayaperumal Gopinathan
32+
* @author Jonghoon Park
3233
* @since 1.0.0-M1
3334
*/
3435
@JsonInclude(JsonInclude.Include.NON_NULL)
3536
public class OpenAiAudioSpeechOptions implements ModelOptions {
3637

3738
/**
38-
* ID of the model to use for generating the audio. One of the available TTS models:
39-
* tts-1 or tts-1-hd.
39+
* ID of the model to use for generating the audio. For OpenAI's TTS API, use one of
40+
* the available models: tts-1 or tts-1-hd.
4041
*/
4142
@JsonProperty("model")
4243
private String model;
@@ -48,11 +49,11 @@ public class OpenAiAudioSpeechOptions implements ModelOptions {
4849
private String input;
4950

5051
/**
51-
* The voice to use for synthesis. One of the available voices for the chosen model:
52-
* 'alloy', 'echo', 'fable', 'onyx', 'nova', and 'shimmer'.
52+
* The voice to use for synthesis. For OpenAI's TTS API, One of the available voices
53+
* for the chosen model: 'alloy', 'echo', 'fable', 'onyx', 'nova', and 'shimmer'.
5354
*/
5455
@JsonProperty("voice")
55-
private Voice voice;
56+
private String voice;
5657

5758
/**
5859
* The format of the audio output. Supported formats are mp3, opus, aac, and flac.
@@ -88,14 +89,18 @@ public void setInput(String input) {
8889
this.input = input;
8990
}
9091

91-
public Voice getVoice() {
92+
public String getVoice() {
9293
return this.voice;
9394
}
9495

95-
public void setVoice(Voice voice) {
96+
public void setVoice(String voice) {
9697
this.voice = voice;
9798
}
9899

100+
public void setVoice(Voice voice) {
101+
this.voice = voice.getValue();
102+
}
103+
99104
public AudioResponseFormat getResponseFormat() {
100105
return this.responseFormat;
101106
}
@@ -197,11 +202,16 @@ public Builder input(String input) {
197202
return this;
198203
}
199204

200-
public Builder voice(Voice voice) {
205+
public Builder voice(String voice) {
201206
this.options.voice = voice;
202207
return this;
203208
}
204209

210+
public Builder voice(Voice voice) {
211+
this.options.voice = voice.getValue();
212+
return this;
213+
}
214+
205215
public Builder responseFormat(AudioResponseFormat responseFormat) {
206216
this.options.responseFormat = responseFormat;
207217
return this;

models/spring-ai-openai/src/main/java/org/springframework/ai/openai/api/OpenAiAudioApi.java

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2023-2024 the original author or authors.
2+
* Copyright 2023-2025 the original author or authors.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -47,6 +47,7 @@
4747
*
4848
* @author Christian Tzolov
4949
* @author Ilayaperumal Gopinathan
50+
* @author Jonghoon Park
5051
* @since 0.8.1
5152
*/
5253
public class OpenAiAudioApi {
@@ -330,7 +331,7 @@ public record SpeechRequest(
330331
// @formatter:off
331332
@JsonProperty("model") String model,
332333
@JsonProperty("input") String input,
333-
@JsonProperty("voice") Voice voice,
334+
@JsonProperty("voice") String voice,
334335
@JsonProperty("response_format") AudioResponseFormat responseFormat,
335336
@JsonProperty("speed") Float speed) {
336337
// @formatter:on
@@ -419,7 +420,7 @@ public static class Builder {
419420

420421
private String input;
421422

422-
private Voice voice;
423+
private String voice;
423424

424425
private AudioResponseFormat responseFormat = AudioResponseFormat.MP3;
425426

@@ -435,11 +436,16 @@ public Builder input(String input) {
435436
return this;
436437
}
437438

438-
public Builder voice(Voice voice) {
439+
public Builder voice(String voice) {
439440
this.voice = voice;
440441
return this;
441442
}
442443

444+
public Builder voice(Voice voice) {
445+
this.voice = voice.getValue();
446+
return this;
447+
}
448+
443449
public Builder responseFormat(AudioResponseFormat responseFormat) {
444450
this.responseFormat = responseFormat;
445451
return this;

models/spring-ai-openai/src/test/java/org/springframework/ai/openai/audio/api/OpenAiAudioApiIT.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2023-2024 the original author or authors.
2+
* Copyright 2023-2025 the original author or authors.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -37,6 +37,7 @@
3737

3838
/**
3939
* @author Christian Tzolov
40+
* @author Jonghoon Park
4041
*/
4142
@EnabledIfEnvironmentVariable(named = "OPENAI_API_KEY", matches = ".+")
4243
public class OpenAiAudioApiIT {
@@ -53,7 +54,7 @@ void speechTranscriptionAndTranslation() throws IOException {
5354
.createSpeech(SpeechRequest.builder()
5455
.model(TtsModel.TTS_1_HD.getValue())
5556
.input("Hello, my name is Chris and I love Spring A.I.")
56-
.voice(Voice.ONYX)
57+
.voice(Voice.ONYX.getValue())
5758
.build())
5859
.getBody();
5960

models/spring-ai-openai/src/test/java/org/springframework/ai/openai/audio/api/OpenAiAudioModelNoOpApiKeysIT.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131

3232
/**
3333
* @author Ilayaperumal Gopinathan
34+
* @author Jonghoon Park
3435
*/
3536
@SpringBootTest(classes = OpenAiAudioModelNoOpApiKeysIT.Config.class)
3637
@EnabledIfEnvironmentVariable(named = "OPENAI_API_KEY", matches = ".+")
@@ -46,7 +47,7 @@ void checkNoOpKey() {
4647
.createSpeech(OpenAiAudioApi.SpeechRequest.builder()
4748
.model(OpenAiAudioApi.TtsModel.TTS_1_HD.getValue())
4849
.input("Hello, my name is Chris and I love Spring A.I.")
49-
.voice(OpenAiAudioApi.SpeechRequest.Voice.ONYX)
50+
.voice(OpenAiAudioApi.SpeechRequest.Voice.ONYX.getValue())
5051
.build())
5152
.getBody();
5253
}).isInstanceOf(NonTransientAiException.class);

models/spring-ai-openai/src/test/java/org/springframework/ai/openai/audio/speech/OpenAiSpeechModelIT.java

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2023-2024 the original author or authors.
2+
* Copyright 2023-2025 the original author or authors.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -33,6 +33,10 @@
3333

3434
import static org.assertj.core.api.Assertions.assertThat;
3535

36+
/**
37+
* @author Ahmed Yousri
38+
* @author Jonghoon Park
39+
*/
3640
@SpringBootTest(classes = OpenAiTestConfiguration.class)
3741
@EnabledIfEnvironmentVariable(named = "OPENAI_API_KEY", matches = ".+")
3842
class OpenAiSpeechModelIT extends AbstractIT {
@@ -57,7 +61,7 @@ void shouldProduceAudioBytesDirectlyFromMessage() {
5761
@Test
5862
void shouldGenerateNonEmptyMp3AudioFromSpeechPrompt() {
5963
OpenAiAudioSpeechOptions speechOptions = OpenAiAudioSpeechOptions.builder()
60-
.voice(OpenAiAudioApi.SpeechRequest.Voice.ALLOY)
64+
.voice(OpenAiAudioApi.SpeechRequest.Voice.ALLOY.getValue())
6165
.speed(SPEED)
6266
.responseFormat(OpenAiAudioApi.SpeechRequest.AudioResponseFormat.MP3)
6367
.model(OpenAiAudioApi.TtsModel.TTS_1.value)
@@ -93,7 +97,7 @@ void shouldGenerateNonEmptyWavAudioFromSpeechPrompt() {
9397
@Test
9498
void speechRateLimitTest() {
9599
OpenAiAudioSpeechOptions speechOptions = OpenAiAudioSpeechOptions.builder()
96-
.voice(OpenAiAudioApi.SpeechRequest.Voice.ALLOY)
100+
.voice(OpenAiAudioApi.SpeechRequest.Voice.ALLOY.getValue())
97101
.speed(SPEED)
98102
.responseFormat(OpenAiAudioApi.SpeechRequest.AudioResponseFormat.MP3)
99103
.model(OpenAiAudioApi.TtsModel.TTS_1.value)
@@ -113,7 +117,7 @@ void speechRateLimitTest() {
113117
void shouldStreamNonEmptyResponsesForValidSpeechPrompts() {
114118

115119
OpenAiAudioSpeechOptions speechOptions = OpenAiAudioSpeechOptions.builder()
116-
.voice(OpenAiAudioApi.SpeechRequest.Voice.ALLOY)
120+
.voice(OpenAiAudioApi.SpeechRequest.Voice.ALLOY.getValue())
117121
.speed(SPEED)
118122
.responseFormat(OpenAiAudioApi.SpeechRequest.AudioResponseFormat.MP3)
119123
.model(OpenAiAudioApi.TtsModel.TTS_1.value)
@@ -135,7 +139,7 @@ void shouldStreamNonEmptyResponsesForValidSpeechPrompts() {
135139
@ValueSource(strings = { "alloy", "echo", "fable", "onyx", "nova", "shimmer", "sage", "coral", "ash" })
136140
void speechVoicesTest(String voice) {
137141
OpenAiAudioSpeechOptions speechOptions = OpenAiAudioSpeechOptions.builder()
138-
.voice(OpenAiAudioApi.SpeechRequest.Voice.valueOf(voice.toUpperCase()))
142+
.voice(voice)
139143
.speed(SPEED)
140144
.responseFormat(OpenAiAudioApi.SpeechRequest.AudioResponseFormat.MP3)
141145
.model(OpenAiAudioApi.TtsModel.TTS_1.value)

models/spring-ai-openai/src/test/java/org/springframework/ai/openai/audio/speech/OpenAiSpeechModelWithSpeechResponseMetadataTests.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646

4747
/**
4848
* @author Ahmed Yousri
49+
* @author Jonghoon Park
4950
*/
5051
@RestClientTest(OpenAiSpeechModelWithSpeechResponseMetadataTests.Config.class)
5152
public class OpenAiSpeechModelWithSpeechResponseMetadataTests {
@@ -71,7 +72,7 @@ void aiResponseContainsImageResponseMetadata() {
7172
prepareMock();
7273

7374
OpenAiAudioSpeechOptions speechOptions = OpenAiAudioSpeechOptions.builder()
74-
.voice(OpenAiAudioApi.SpeechRequest.Voice.ALLOY)
75+
.voice(OpenAiAudioApi.SpeechRequest.Voice.ALLOY.getValue())
7576
.speed(SPEED)
7677
.responseFormat(OpenAiAudioApi.SpeechRequest.AudioResponseFormat.MP3)
7778
.model(OpenAiAudioApi.TtsModel.TTS_1.value)

0 commit comments

Comments
 (0)