Skip to content

Commit ced9e6a

Browse files
committed
Fix and optimize azure openai chat/embedding clients
- Also restructore and clarify the chat/embedding docs.
1 parent ca0f6d7 commit ced9e6a

File tree

8 files changed

+216
-214
lines changed

8 files changed

+216
-214
lines changed

models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiEmbeddingClient.java

Lines changed: 14 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import org.springframework.ai.document.MetadataMode;
1616
import org.springframework.ai.embedding.AbstractEmbeddingClient;
1717
import org.springframework.ai.embedding.Embedding;
18+
import org.springframework.ai.embedding.EmbeddingOptions;
1819
import org.springframework.ai.embedding.EmbeddingRequest;
1920
import org.springframework.ai.embedding.EmbeddingResponse;
2021
import org.springframework.ai.embedding.EmbeddingResponseMetadata;
@@ -27,9 +28,7 @@ public class AzureOpenAiEmbeddingClient extends AbstractEmbeddingClient {
2728

2829
private final OpenAIClient azureOpenAiClient;
2930

30-
private AzureOpenAiEmbeddingOptions defaultOptions = AzureOpenAiEmbeddingOptions.builder()
31-
.withModel("text-embedding-ada-002")
32-
.build();
31+
private final AzureOpenAiEmbeddingOptions defaultOptions;
3332

3433
private final MetadataMode metadataMode;
3534

@@ -38,10 +37,18 @@ public AzureOpenAiEmbeddingClient(OpenAIClient azureOpenAiClient) {
3837
}
3938

4039
public AzureOpenAiEmbeddingClient(OpenAIClient azureOpenAiClient, MetadataMode metadataMode) {
40+
this(azureOpenAiClient, metadataMode,
41+
AzureOpenAiEmbeddingOptions.builder().withModel("text-embedding-ada-002").build());
42+
}
43+
44+
public AzureOpenAiEmbeddingClient(OpenAIClient azureOpenAiClient, MetadataMode metadataMode,
45+
AzureOpenAiEmbeddingOptions options) {
4146
Assert.notNull(azureOpenAiClient, "com.azure.ai.openai.OpenAIClient must not be null");
4247
Assert.notNull(metadataMode, "Metadata mode must not be null");
48+
Assert.notNull(options, "Options must not be null");
4349
this.azureOpenAiClient = azureOpenAiClient;
4450
this.metadataMode = metadataMode;
51+
this.defaultOptions = options;
4552
}
4653

4754
@Override
@@ -58,14 +65,7 @@ public List<Double> embed(Document document) {
5865
public EmbeddingResponse call(EmbeddingRequest embeddingRequest) {
5966
logger.debug("Retrieving embeddings");
6067

61-
EmbeddingsOptions azureOptions = new EmbeddingsOptions(embeddingRequest.getInstructions());
62-
if (this.defaultOptions != null) {
63-
azureOptions = ModelOptionsUtils.merge(azureOptions, this.defaultOptions, EmbeddingsOptions.class);
64-
}
65-
if (embeddingRequest.getOptions() != null) {
66-
azureOptions = ModelOptionsUtils.merge(embeddingRequest.getOptions(), azureOptions,
67-
EmbeddingsOptions.class);
68-
}
68+
EmbeddingsOptions azureOptions = toEmbeddingOptions(embeddingRequest);
6969
Embeddings embeddings = this.azureOpenAiClient.getEmbeddings(azureOptions.getModel(), azureOptions);
7070

7171
logger.debug("Embeddings retrieved");
@@ -78,9 +78,10 @@ public EmbeddingResponse call(EmbeddingRequest embeddingRequest) {
7878
EmbeddingsOptions toEmbeddingOptions(EmbeddingRequest embeddingRequest) {
7979
var azureOptions = new EmbeddingsOptions(embeddingRequest.getInstructions());
8080
if (this.defaultOptions != null) {
81-
azureOptions = ModelOptionsUtils.merge(azureOptions, this.defaultOptions, EmbeddingsOptions.class);
81+
azureOptions.setModel(this.defaultOptions.getModel());
82+
azureOptions.setUser(this.defaultOptions.getUser());
8283
}
83-
if (embeddingRequest.getOptions() != null) {
84+
if (embeddingRequest.getOptions() != null && !EmbeddingOptions.EMPTY.equals(embeddingRequest.getOptions())) {
8485
azureOptions = ModelOptionsUtils.merge(embeddingRequest.getOptions(), azureOptions,
8586
EmbeddingsOptions.class);
8687
}
@@ -116,14 +117,4 @@ public AzureOpenAiEmbeddingOptions getDefaultOptions() {
116117
return this.defaultOptions;
117118
}
118119

119-
public void setDefaultOptions(AzureOpenAiEmbeddingOptions defaultOptions) {
120-
Assert.notNull(defaultOptions, "Default options must not be null");
121-
this.defaultOptions = defaultOptions;
122-
}
123-
124-
public AzureOpenAiEmbeddingClient withDefaultOptions(AzureOpenAiEmbeddingOptions options) {
125-
this.defaultOptions = options;
126-
return this;
127-
}
128-
129120
}

models/spring-ai-azure-openai/src/test/java/org/springframework/ai/azure/openai/AzureEmbeddingsOptionsTests.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.junit.jupiter.api.Test;
2323
import org.mockito.Mockito;
2424

25+
import org.springframework.ai.document.MetadataMode;
2526
import org.springframework.ai.embedding.EmbeddingRequest;
2627

2728
import static org.assertj.core.api.Assertions.assertThat;
@@ -36,7 +37,7 @@ public class AzureEmbeddingsOptionsTests {
3637
public void createRequestWithChatOptions() {
3738

3839
OpenAIClient mockClient = Mockito.mock(OpenAIClient.class);
39-
var client = new AzureOpenAiEmbeddingClient(mockClient).withDefaultOptions(
40+
var client = new AzureOpenAiEmbeddingClient(mockClient, MetadataMode.EMBED,
4041
AzureOpenAiEmbeddingOptions.builder().withModel("DEFAULT_MODEL").withUser("USER_TEST").build());
4142

4243
var requestOptions = client.toEmbeddingOptions(new EmbeddingRequest(List.of("Test message content"), null));

spring-ai-docs/src/main/antora/modules/ROOT/pages/api/clients/azure-openai-chat.adoc

Lines changed: 97 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -4,78 +4,22 @@ Azure's OpenAI offering, powered by ChatGPT, extends beyond traditional OpenAI c
44

55
Azure offers Java developers the opportunity to leverage AI's full potential by integrating it with an array of Azure services, which includes AI-related resources such as Vector Stores on Azure.
66

7-
== Getting Started
7+
== Pre-requisites
88

99
Obtain your Azure OpenAI `endpoint` and `api-key` from the Azure OpenAI Service section on the link:https://portal.azure.com[Azure Portal].
1010

11-
=== Configure the Azure OpenAI Chat Client Manually
12-
13-
Add the `spring-ai-azure-openai` dependency to your project's Maven `pom.xml` file:
14-
[source, xml]
15-
----
16-
<dependency>
17-
<groupId>org.springframework.ai</groupId>
18-
<artifactId>spring-ai-azure-openai</artifactId>
19-
<version>0.8.0-SNAPSHOT</version>
20-
</dependency>
21-
----
22-
23-
or to your Gradle `build.gradle` build file.
24-
25-
[source,gradle]
26-
----
27-
dependencies {
28-
implementation 'org.springframework.ai:spring-ai-azure-openai:0.8.0-SNAPSHOT'
29-
}
30-
----
31-
32-
NOTE: The `spring-ai-azure-openai` dependency also provide the access to the `AzureOpenAiChatClient`. For more information about the `AzureOpenAiChatClient` refer to the link:../clients/azure-openai-chat.html[Azure OpenAI Chat] section.
33-
34-
Next, create an `AzureOpenAiChatClient` instance and use it to generate text responses:
35-
36-
[source,java]
37-
----
38-
var openAIClient = OpenAIClientBuilder()
39-
.credential(new AzureKeyCredential(System.getenv("AZURE_OPENAI_API_KEY")))
40-
.endpoint(System.getenv("AZURE_OPENAI_ENDPOINT"))
41-
.buildClient();
42-
43-
var chatClient = new AzureOpenAiChatClient(openAIClient).withDefaultOptions(
44-
AzureOpenAiChatOptions.builder()
45-
.withModel("gpt-35-turbo")
46-
.withTemperature(0.4)
47-
.withMaxTokens(200)
48-
.build());
49-
50-
ChatResponse response = chatClient.call(
51-
new Prompt("Generate the names of 5 famous pirates."));
52-
53-
// Or with streaming responses
54-
Flux<ChatResponse> response = chatClient.stream(
55-
new Prompt("Generate the names of 5 famous pirates."));
56-
57-
----
58-
59-
NOTE: the `gpt-35-turbo` is actually the `Deployment Name` as presented in the Azure AI Portal.
60-
61-
The `AzureOpenAiChatOptions` provides the configuration information for the chat requests.
62-
The `AzureOpenAiChatOptions` offers a builder to create the options.
63-
64-
At start time use the `AzureOpenAiChatClient#withDefaultOptions()` to configure the default options used for all char requests.
65-
Furthermore, at runtime, you can override the default options by passing a `AzureOpenAiChatOptions` instance with your to the `Prompt` request.
11+
Spring AI defines a configuration property named `spring.ai.azure.openai.api-key` that you should set to the value of the `API Key` obtained from Azure.
12+
There is also a configuration property named `spring.ai.azure.openai.endpoint` that you should set to the endpoint URL obtained when provisioning your model in Azure.
6613

67-
For example to override the default model name for a specific request:
14+
Exporting environment variables is one way to set these configuration properties:
6815

69-
[source,java]
16+
[source,shell]
7017
----
71-
ChatResponse response = chatClient.call(
72-
new Prompt(
73-
"Generate the names of 5 famous pirates.",
74-
AzureOpenAiChatOptions.builder().withModel("gpt-4-32k").build()
75-
));
18+
export SPRING_AI_AZURE_OPENAI_API_KEY=<INSERT KEY HERE>
19+
export SPRING_AI_AZURE_OPENAI_ENDPOINT=<INSERT ENDPOINT URL HERE>
7620
----
7721

78-
=== Spring Boot Auto-configuration
22+
== Auto-configuration
7923

8024
Spring AI provides Spring Boot auto-configuration for the Azure OpenAI Chat Client.
8125
To enable it add the following dependency to your project's Maven `pom.xml` file:
@@ -98,20 +42,38 @@ dependencies {
9842
}
9943
----
10044

101-
Spring AI defines a configuration property named `spring.ai.azure.openai.api-key` that you should set to the value of the `API Key` obtained from Azure.
102-
There is also a configuration property named `spring.ai.azure.openai.endpoint` that you should set to the endpoint URL obtained when provisioning your model in Azure.
45+
=== Chat Properties
10346

104-
Exporting environment variables is one way to set these configuration properties:
47+
The prefix `spring.ai.azure.openai` is the property prefix to configure the connection to Azure OpenAI.
10548

106-
[source,shell]
107-
----
108-
export SPRING_AI_AZURE_OPENAI_API_KEY=<INSERT KEY HERE>
109-
export SPRING_AI_AZURE_OPENAI_ENDPOINT=<INSERT ENDPOINT URL HERE>
110-
----
49+
[cols="3,5,3"]
50+
|====
51+
| Property | Description | Default
52+
53+
| spring.ai.azure.openai.api-key | The Key from Azure AI OpenAI `Keys and Endpoint` section under `Resource Management` | -
54+
| spring.ai.azure.openai.endpoint | The endpoint from the Azure AI OpenAI `Keys and Endpoint` section under `Resource Management` | -
55+
|====
11156

112-
The `spring.ai.azure.openai.chat.options.*` properties are used to configure the default options used for all chat requests.
57+
The prefix `spring.ai.azure.openai.chat` is the property prefix that configures the `ChatClient` implementation for Azure OpenAI.
11358

114-
==== Sample Code
59+
[cols="3,5,3"]
60+
|====
61+
| Property | Description | Default
62+
63+
| spring.ai.azure.openai.chat.options.model | * The model name to provide as part of this completions request. Not applicable to Azure OpenAI, where deployment information should be included in the Azure resource URI that's connected to.
64+
| gpt-35-turbo
65+
| spring.ai.azure.openai.chat.options.maxTokens | The maximum number of tokens to generate. | -
66+
| spring.ai.azure.openai.chat.options.temperature | The sampling temperature to use that controls the apparent creativity of generated completions. Higher values will make output more random while lower values will make results more focused and deterministic. It is not recommended to modify temperature and top_p for the same completions request as the interaction of these two settings is difficult to predict. | 0.7
67+
| spring.ai.azure.openai.chat.options.topP | An alternative to sampling with temperature called nucleus sampling. This value causes the model to consider the results of tokens with the provided probability mass. | -
68+
| spring.ai.azure.openai.chat.options.logitBias | A map between GPT token IDs and bias scores that influences the probability of specific tokens appearing in a completions response. Token IDs are computed via external tokenizer tools, while bias scores reside in the range of -100 to 100 with minimum and maximum values corresponding to a full ban or exclusive selection of a token, respectively. The exact behavior of a given bias score varies by model. | -
69+
| spring.ai.azure.openai.chat.options.user | An identifier for the caller or end user of the operation. This may be used for tracking or rate-limiting purposes. | -
70+
| spring.ai.azure.openai.chat.options.n | The number of chat completions choices that should be generated for a chat completions response. | -
71+
| spring.ai.azure.openai.chat.options.stop | A collection of textual sequences that will end completions generation. | -
72+
| spring.ai.azure.openai.chat.options.presencePenalty | A value that influences the probability of generated tokens appearing based on their existing presence in generated text. Positive values will make tokens less likely to appear when they already exist and increase the model's likelihood to output new topics. | -
73+
| spring.ai.azure.openai.chat.options.frequencyPenalty | A value that influences the probability of generated tokens appearing based on their cumulative frequency in generated text. Positive values will make tokens less likely to appear as their frequency increases and decrease the likelihood of the model repeating the same statements verbatim. | -
74+
|====
75+
76+
=== Sample Code
11577

11678
This will create a `ChatClient` implementation that you can inject into your class.
11779
Here is an example of a simple `@Controller` class that uses the `ChatClient` implementation.
@@ -143,34 +105,71 @@ public class ChatController {
143105
}
144106
----
145107

146-
== Azure OpenAI Chat Properties
108+
== Manual Configuration
147109

148-
The prefix `spring.ai.azure.openai` is the property prefix to configure the connection to Azure OpenAI.
110+
Add the `spring-ai-azure-openai` dependency to your project's Maven `pom.xml` file:
111+
[source, xml]
112+
----
113+
<dependency>
114+
<groupId>org.springframework.ai</groupId>
115+
<artifactId>spring-ai-azure-openai</artifactId>
116+
<version>0.8.0-SNAPSHOT</version>
117+
</dependency>
118+
----
149119

150-
[cols="3,5,3"]
151-
|====
152-
| Property | Description | Default
120+
or to your Gradle `build.gradle` build file.
153121

154-
| spring.ai.azure.openai.api-key | The Key from Azure AI OpenAI `Keys and Endpoint` section under `Resource Management` | -
155-
| spring.ai.azure.openai.endpoint | The endpoint from the Azure AI OpenAI `Keys and Endpoint` section under `Resource Management` | -
156-
|====
122+
[source,gradle]
123+
----
124+
dependencies {
125+
implementation 'org.springframework.ai:spring-ai-azure-openai:0.8.0-SNAPSHOT'
126+
}
127+
----
157128

129+
NOTE: The `spring-ai-azure-openai` dependency also provide the access to the `AzureOpenAiChatClient`. For more information about the `AzureOpenAiChatClient` refer to the link:../clients/azure-openai-chat.html[Azure OpenAI Chat] section.
158130

159-
The prefix `spring.ai.azure.openai.chat` is the property prefix that configures the `ChatClient` implementation for Azure OpenAI.
131+
Next, create an `AzureOpenAiChatClient` instance and use it to generate text responses:
160132

161-
[cols="3,5,3"]
162-
|====
163-
| Property | Description | Default
133+
[source,java]
134+
----
135+
var openAIClient = OpenAIClientBuilder()
136+
.credential(new AzureKeyCredential(System.getenv("AZURE_OPENAI_API_KEY")))
137+
.endpoint(System.getenv("AZURE_OPENAI_ENDPOINT"))
138+
.buildClient();
164139
165-
| spring.ai.azure.openai.chat.options.model | * The model name to provide as part of this completions request. Not applicable to Azure OpenAI, where deployment information should be included in the Azure resource URI that's connected to.
166-
| gpt-35-turbo
167-
| spring.ai.azure.openai.chat.options.maxTokens | The maximum number of tokens to generate. | -
168-
| spring.ai.azure.openai.chat.options.temperature | The sampling temperature to use that controls the apparent creativity of generated completions. Higher values will make output more random while lower values will make results more focused and deterministic. It is not recommended to modify temperature and top_p for the same completions request as the interaction of these two settings is difficult to predict. | 0.7
169-
| spring.ai.azure.openai.chat.options.topP | An alternative to sampling with temperature called nucleus sampling. This value causes the model to consider the results of tokens with the provided probability mass. | -
170-
| spring.ai.azure.openai.chat.options.logitBias | A map between GPT token IDs and bias scores that influences the probability of specific tokens appearing in a completions response. Token IDs are computed via external tokenizer tools, while bias scores reside in the range of -100 to 100 with minimum and maximum values corresponding to a full ban or exclusive selection of a token, respectively. The exact behavior of a given bias score varies by model. | -
171-
| spring.ai.azure.openai.chat.options.user | An identifier for the caller or end user of the operation. This may be used for tracking or rate-limiting purposes. | -
172-
| spring.ai.azure.openai.chat.options.n | The number of chat completions choices that should be generated for a chat completions response. | -
173-
| spring.ai.azure.openai.chat.options.stop | A collection of textual sequences that will end completions generation. | -
174-
| spring.ai.azure.openai.chat.options.presencePenalty | A value that influences the probability of generated tokens appearing based on their existing presence in generated text. Positive values will make tokens less likely to appear when they already exist and increase the model's likelihood to output new topics. | -
175-
| spring.ai.azure.openai.chat.options.frequencyPenalty | A value that influences the probability of generated tokens appearing based on their cumulative frequency in generated text. Positive values will make tokens less likely to appear as their frequency increases and decrease the likelihood of the model repeating the same statements verbatim. | -
176-
|====
140+
var chatClient = new AzureOpenAiChatClient(openAIClient).withDefaultOptions(
141+
AzureOpenAiChatOptions.builder()
142+
.withModel("gpt-35-turbo")
143+
.withTemperature(0.4)
144+
.withMaxTokens(200)
145+
.build());
146+
147+
ChatResponse response = chatClient.call(
148+
new Prompt("Generate the names of 5 famous pirates."));
149+
150+
// Or with streaming responses
151+
Flux<ChatResponse> response = chatClient.stream(
152+
new Prompt("Generate the names of 5 famous pirates."));
153+
154+
----
155+
156+
NOTE: the `gpt-35-turbo` is actually the `Deployment Name` as presented in the Azure AI Portal.
157+
158+
=== Chat Options
159+
160+
The `AzureOpenAiChatOptions` provides the configuration information for the chat requests.
161+
The `AzureOpenAiChatOptions` offers a builder to create the options.
162+
163+
At start time use the `AzureOpenAiChatClient` constructor to set the default options used for all char requests.
164+
At runtime, you can override the default options by passing a `AzureOpenAiChatOptions` instance with your to the `Prompt` request.
165+
166+
For example to override the default model name for a specific request:
167+
168+
[source,java]
169+
----
170+
ChatResponse response = chatClient.call(
171+
new Prompt(
172+
"Generate the names of 5 famous pirates.",
173+
AzureOpenAiChatOptions.builder().withModel("gpt-4-32k").build()
174+
));
175+
----

0 commit comments

Comments
 (0)