Ollama: Update APIs, Testcontainers, Documentation

ThomasVitale · tzolov · commit f91ccf00475f · 2024-05-05T17:25:51.000+03:00
Signed-off-by: Thomas Vitale &lt;ThomasVitale@users.noreply.github.com&gt;
diff --git a/models/spring-ai-ollama/pom.xml b/models/spring-ai-ollama/pom.xml
@@ -73,5 +73,11 @@
             <artifactId>junit-jupiter</artifactId>
             <scope>test</scope>
         </dependency>
+
+        <dependency>
+            <groupId>org.testcontainers</groupId>
+            <artifactId>ollama</artifactId>
+            <scope>test</scope>
+        </dependency>
     </dependencies>
 </project>
diff --git a/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaApi.java b/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaApi.java
@@ -139,6 +139,8 @@ public OllamaApi(String baseUrl, RestClient.Builder restClientBuilder) {
 	 * context will be returned. You may choose to use the raw parameter if you are
 	 * specifying a full templated prompt in your request to the API, and are managing
 	 * history yourself.
+	 * @param images (optional) a list of base64-encoded images (for multimodal models such as llava).
+	 * @param keepAlive (optional) controls how long the model will stay loaded into memory following the request (default: 5m).
 	 */
 	@JsonInclude(Include.NON_NULL)
 	public record GenerateRequest(
@@ -503,9 +505,9 @@ public ChatRequest build() {
 	 * @param evalCount number of tokens in the response.
 	 * @param evalDuration time spent generating the response.
 	 * @see <a href=
-	 * "https://github.com/jmorganca/ollama/blob/main/docs/api.md#generate-a-chat-completion">Chat
+	 * "https://github.com/ollama/ollama/blob/main/docs/api.md#generate-a-chat-completion">Chat
 	 * Completion API</a>
-	 * @see <a href="https://github.com/jmorganca/ollama/blob/main/api/types.go">Ollama
+	 * @see <a href="https://github.com/ollama/ollama/blob/main/api/types.go">Ollama
 	 * Types</a>
 	 */
 	@JsonInclude(Include.NON_NULL)
@@ -573,6 +575,7 @@ public Flux<ChatResponse> streamingChat(ChatRequest chatRequest) {
 	 *
 	 * @param model The name of model to generate embeddings from.
 	 * @param prompt The text to generate embeddings for.
+	 * @param keepAlive Controls how long the model will stay loaded into memory following the request (default: 5m).
 	 * @param options Additional model parameters listed in the documentation for the
 	 * Modelfile such as temperature.
 	 */
diff --git a/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaModel.java b/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaModel.java
@@ -28,6 +28,11 @@ public enum OllamaModel {
 	 */
 	LLAMA2("llama2"),
 
+	/**
+	 * Llama 3 is a collection of language models ranging from 8B and 70B parameters.
+	 */
+	LLAMA3("llama3"),
+
 	/**
 	 * The 7B parameters model
 	 */
@@ -43,6 +48,11 @@ public enum OllamaModel {
 	 */
 	PHI("phi"),
 
+	/**
+	 * The Phi-3 3.8B language model
+	 */
+	PHI3("phi3"),
+
 	/**
 	 * A fine-tuned Mistral model
 	 */
diff --git a/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaOptions.java b/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaOptions.java
@@ -35,10 +35,9 @@
  * @author Christian Tzolov
  * @since 0.8.0
  * @see <a href=
- * "https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values">Ollama
+ * "https://github.com/ollama/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values">Ollama
  * Valid Parameters and Values</a>
- * @see <a href="https://github.com/jmorganca/ollama/blob/main/api/types.go">Ollama
- * Types</a>
+ * @see <a href="https://github.com/ollama/ollama/blob/main/api/types.go">Ollama Types</a>
  */
 @JsonInclude(Include.NON_NULL)
 public class OllamaOptions implements ChatOptions, EmbeddingOptions {
@@ -47,6 +46,8 @@ public class OllamaOptions implements ChatOptions, EmbeddingOptions {
 
 	private static final List<String> NON_SUPPORTED_FIELDS = List.of("model", "format", "keep_alive");
 
+	// Following fields are ptions which must be set when the model is loaded into memory.
+
 	// @formatter:off
 	/**
 	 * useNUMA Whether to use NUMA.
@@ -110,16 +111,6 @@ public class OllamaOptions implements ChatOptions, EmbeddingOptions {
 	 */
 	@JsonProperty("use_mlock") private Boolean useMLock;
 
-	/**
-	 * ???
-	 */
-	@JsonProperty("rope_frequency_base") private Float ropeFrequencyBase;
-
-	/**
-	 * ???
-	 */
-	@JsonProperty("rope_frequency_scale") private Float ropeFrequencyScale;
-
 	/**
 	 * Sets the number of threads to use during computation. By default,
 	 * Ollama will detect this for optimal performance. It is recommended to set this
@@ -128,6 +119,8 @@ public class OllamaOptions implements ChatOptions, EmbeddingOptions {
 	 */
 	@JsonProperty("num_thread") private Integer numThread;
 
+	// Following fields are predict options used at runtime.
+
 	/**
 	 * ???
 	 */
@@ -156,8 +149,8 @@ public class OllamaOptions implements ChatOptions, EmbeddingOptions {
 	/**
 	 * Works together with top-k. A higher value (e.g., 0.95) will lead to
 	 * more diverse text, while a lower value (e.g., 0.5) will generate more focused and
-		* conservative text. (Default: 0.9)
-		*/
+	 * conservative text. (Default: 0.9)
+	 */
 	@JsonProperty("top_p") private Float topP;
 
 	/**
@@ -208,16 +201,15 @@ public class OllamaOptions implements ChatOptions, EmbeddingOptions {
 	@JsonProperty("mirostat") private Integer mirostat;
 
 	/**
-	 * Influences how quickly the algorithm responds to feedback from
-	 * the generated text. A lower learning rate will result in slower adjustments, while
-	 * a higher learning rate will make the algorithm more responsive. (Default: 0.1).
+	 * Controls the balance between coherence and diversity of the output.
+	 * A lower value will result in more focused and coherent text. (Default: 5.0)
 	 */
 	@JsonProperty("mirostat_tau") private Float mirostatTau;
 
 	/**
-	 * Controls the balance between coherence and diversity of the
-	 * output. A lower value will result in more focused and coherent text. (Default:
-	 * 5.0).
+	 * Influences how quickly the algorithm responds to feedback from the generated text.
+	 * A lower learning rate will result in slower adjustments, while a higher learning rate
+	 * will make the algorithm more responsive. (Default: 0.1)
 	 */
 	@JsonProperty("mirostat_eta") private Float mirostatEta;
 
@@ -235,6 +227,7 @@ public class OllamaOptions implements ChatOptions, EmbeddingOptions {
 
 
 	// Following fields are not part of the Ollama Options API but part of the Request.
+
 	/**
 	 * NOTE: Synthetic field not part of the official Ollama API.
 	 * Used to allow overriding the model name with prompt options.
@@ -341,16 +334,6 @@ public OllamaOptions withUseMLock(Boolean useMLock) {
 		return this;
 	}
 
-	public OllamaOptions withRopeFrequencyBase(Float ropeFrequencyBase) {
-		this.ropeFrequencyBase = ropeFrequencyBase;
-		return this;
-	}
-
-	public OllamaOptions withRopeFrequencyScale(Float ropeFrequencyScale) {
-		this.ropeFrequencyScale = ropeFrequencyScale;
-		return this;
-	}
-
 	public OllamaOptions withNumThread(Integer numThread) {
 		this.numThread = numThread;
 		return this;
@@ -553,22 +536,6 @@ public void setUseMLock(Boolean useMLock) {
 		this.useMLock = useMLock;
 	}
 
-	public Float getRopeFrequencyBase() {
-		return this.ropeFrequencyBase;
-	}
-
-	public void setRopeFrequencyBase(Float ropeFrequencyBase) {
-		this.ropeFrequencyBase = ropeFrequencyBase;
-	}
-
-	public Float getRopeFrequencyScale() {
-		return this.ropeFrequencyScale;
-	}
-
-	public void setRopeFrequencyScale(Float ropeFrequencyScale) {
-		this.ropeFrequencyScale = ropeFrequencyScale;
-	}
-
 	public Integer getNumThread() {
 		return this.numThread;
 	}
diff --git a/models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/OllamaChatClientIT.java b/models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/OllamaChatClientIT.java
@@ -29,7 +29,6 @@
 import org.springframework.ai.chat.metadata.Usage;
 import org.springframework.ai.chat.prompt.ChatOptionsBuilder;
 import org.springframework.ai.chat.messages.AssistantMessage;
-import org.testcontainers.containers.GenericContainer;
 import org.testcontainers.junit.jupiter.Container;
 import org.testcontainers.junit.jupiter.Testcontainers;
 
@@ -50,6 +49,7 @@
 import org.springframework.boot.test.context.SpringBootTest;
 import org.springframework.context.annotation.Bean;
 import org.springframework.core.convert.support.DefaultConversionService;
+import org.testcontainers.ollama.OllamaContainer;
 
 import static org.assertj.core.api.Assertions.assertThat;
 
@@ -63,7 +63,7 @@ class OllamaChatClientIT {
 	private static final Log logger = LogFactory.getLog(OllamaChatClientIT.class);
 
 	@Container
-	static GenericContainer<?> ollamaContainer = new GenericContainer<>("ollama/ollama:0.1.29").withExposedPorts(11434);
+	static OllamaContainer ollamaContainer = new OllamaContainer("ollama/ollama:0.1.32");
 
 	static String baseUrl;
 
diff --git a/models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/OllamaChatClientMultimodalIT.java b/models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/OllamaChatClientMultimodalIT.java
@@ -23,7 +23,6 @@
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
-import org.testcontainers.containers.GenericContainer;
 import org.testcontainers.junit.jupiter.Container;
 import org.testcontainers.junit.jupiter.Testcontainers;
 
@@ -39,6 +38,7 @@
 import org.springframework.context.annotation.Bean;
 import org.springframework.core.io.ClassPathResource;
 import org.springframework.util.MimeTypeUtils;
+import org.testcontainers.ollama.OllamaContainer;
 
 import static org.assertj.core.api.Assertions.assertThat;
 
@@ -52,7 +52,7 @@ class OllamaChatClientMultimodalIT {
 	private static final Log logger = LogFactory.getLog(OllamaChatClientIT.class);
 
 	@Container
-	static GenericContainer<?> ollamaContainer = new GenericContainer<>("ollama/ollama:0.1.29").withExposedPorts(11434);
+	static OllamaContainer ollamaContainer = new OllamaContainer("ollama/ollama:0.1.32");
 
 	static String baseUrl;
 
diff --git a/models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/OllamaEmbeddingClientIT.java b/models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/OllamaEmbeddingClientIT.java
@@ -34,6 +34,7 @@
 import org.springframework.boot.SpringBootConfiguration;
 import org.springframework.boot.test.context.SpringBootTest;
 import org.springframework.context.annotation.Bean;
+import org.testcontainers.ollama.OllamaContainer;
 
 import static org.assertj.core.api.Assertions.assertThat;
 
@@ -45,7 +46,7 @@ class OllamaEmbeddingClientIT {
 	private static final Log logger = LogFactory.getLog(OllamaApiIT.class);
 
 	@Container
-	static GenericContainer<?> ollamaContainer = new GenericContainer<>("ollama/ollama:0.1.29").withExposedPorts(11434);
+	static OllamaContainer ollamaContainer = new OllamaContainer("ollama/ollama:0.1.32");
 
 	static String baseUrl;
 
diff --git a/models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/api/OllamaApiIT.java b/models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/api/OllamaApiIT.java
@@ -24,9 +24,9 @@
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
-import org.testcontainers.containers.GenericContainer;
 import org.testcontainers.junit.jupiter.Container;
 import org.testcontainers.junit.jupiter.Testcontainers;
+import org.testcontainers.ollama.OllamaContainer;
 import reactor.core.publisher.Flux;
 
 import org.springframework.ai.ollama.api.OllamaApi.ChatRequest;
@@ -50,7 +50,7 @@ public class OllamaApiIT {
 	private static final Log logger = LogFactory.getLog(OllamaApiIT.class);
 
 	@Container
-	static GenericContainer<?> ollamaContainer = new GenericContainer<>("ollama/ollama:0.1.29").withExposedPorts(11434);
+	static OllamaContainer ollamaContainer = new OllamaContainer("ollama/ollama:0.1.32");
 
 	static OllamaApi ollamaApi;
 
diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/ollama-chat.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/ollama-chat.adoc
@@ -6,9 +6,9 @@ Spring AI supports the Ollama text generation with `OllamaChatClient`.
 == Prerequisites
 
 You first need to run Ollama on your local machine.
-Refer to the official Ollama project link:https://github.com/jmorganca/ollama[README] to get started running models on your local machine.
+Refer to the official Ollama project link:https://github.com/ollama/ollama[README] to get started running models on your local machine.
 
-NOTE: installing `ollama run llama2` will download a 4GB docker image.
+NOTE: installing `ollama run llama3` will download a 4.7GB model artifact.
 
 === Add Repositories and BOM
 
@@ -53,7 +53,7 @@ The prefix `spring.ai.ollama` is the property prefix to configure the connection
 |====
 
 The prefix `spring.ai.ollama.chat.options` is the property prefix that configures the Ollama chat client .
-It includes the Ollama request (advanced) parameters such as the `model`, `keep-alive`, `format` and `template` as well as the Ollama model `options` properties.
+It includes the Ollama request (advanced) parameters such as the `model`, `keep-alive`, and `format` as well as the Ollama model `options` properties.
 
 Here are the advanced request parameter for the Ollama chat client:
 
@@ -62,12 +62,12 @@ Here are the advanced request parameter for the Ollama chat client:
 | Property | Description | Default
 
 | spring.ai.ollama.chat.enabled      | Enable Ollama chat client. | true
-| spring.ai.ollama.chat.options.model  | The name of the https://github.com/ollama/ollama?tab=readme-ov-file#model-library[supported models] to use. | mistral
+| spring.ai.ollama.chat.options.model  | The name of the https://github.com/ollama/ollama?tab=readme-ov-file#model-library[supported model] to use. | mistral
 | spring.ai.ollama.chat.options.format  | The format to return a response in. Currently the only accepted value is `json` | -
 | spring.ai.ollama.chat.options.keep_alive  | Controls how long the model will stay loaded into memory following the request | 5m
 |====
 
-The `options` properties are based on the link:https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values[Ollama Valid Parameters and Values] and link:https://github.com/jmorganca/ollama/blob/main/api/types.go[Ollama Types]. The default values are based on: link:https://github.com/ollama/ollama/blob/b538dc3858014f94b099730a592751a5454cab0a/api/types.go#L364[Ollama type defaults].
+The remaining `options` properties are based on the link:https://github.com/ollama/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values[Ollama Valid Parameters and Values] and link:https://github.com/ollama/ollama/blob/main/api/types.go[Ollama Types]. The default values are based on: link:https://github.com/ollama/ollama/blob/b538dc3858014f94b099730a592751a5454cab0a/api/types.go#L364[Ollama type defaults].
 
 [cols="3,6,1"]
 |====
@@ -84,8 +84,6 @@ The `options` properties are based on the link:https://github.com/jmorganca/olla
 | spring.ai.ollama.chat.options.vocab-only        | ???                                                             | -
 | spring.ai.ollama.chat.options.use-mmap          | ???                                                             | true
 | spring.ai.ollama.chat.options.use-mlock         | ???                                                             | false
-| spring.ai.ollama.chat.options.rope-frequency-base | ???                                                           | 10000.0
-| spring.ai.ollama.chat.options.rope-frequency-scale | ???                                                          | 1.0
 | spring.ai.ollama.chat.options.num-thread        | Sets the number of threads to use during computation. By default, Ollama will detect this for optimal performance. It is recommended to set this value to the number of physical CPU cores your system has (as opposed to the logical number of cores). 0 = let the runtime decide | 0
 | spring.ai.ollama.chat.options.num-keep          | ???                                                             | 0
 | spring.ai.ollama.chat.options.seed              | Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt.  | -1
@@ -100,14 +98,12 @@ The `options` properties are based on the link:https://github.com/jmorganca/olla
 | spring.ai.ollama.chat.options.presence-penalty  | ???                                                             | 0.0
 | spring.ai.ollama.chat.options.frequency-penalty | ???                                                             | 0.0
 | spring.ai.ollama.chat.options.mirostat          | Enable Mirostat sampling for controlling perplexity. (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0) | 0
-| spring.ai.ollama.chat.options.mirostat-tau      | Influences how quickly the algorithm responds to feedback from the generated text. A lower learning rate will result in slower adjustments, while a higher learning rate will make the algorithm more responsive. | 5.0
-| spring.ai.ollama.chat.options.mirostat-eta      | Controls the balance between coherence and diversity of the output. A lower value will result in more focused and coherent text. | 0.1
+| spring.ai.ollama.chat.options.mirostat-tau      | Controls the balance between coherence and diversity of the output. A lower value will result in more focused and coherent text. | 5.0
+| spring.ai.ollama.chat.options.mirostat-eta      | Influences how quickly the algorithm responds to feedback from the generated text. A lower learning rate will result in slower adjustments, while a higher learning rate will make the algorithm more responsive. | 0.1
 | spring.ai.ollama.chat.options.penalize-newline  | ???                                                             | true
 | spring.ai.ollama.chat.options.stop              | Sets the stop sequences to use. When this pattern is encountered the LLM will stop generating text and return. Multiple stop patterns may be set by specifying multiple separate stop parameters in a modelfile. | -
 |====
 
-NOTE: The list of options for chat is to be reviewed. This https://github.com/spring-projects/spring-ai/issues/230[issue] will track progress.
-
 TIP: All properties prefixed with `spring.ai.ollama.chat.options` can be overridden at runtime by adding a request specific <<chat-options>> to the `Prompt` call.
 
 == Runtime Options [[chat-options]]
@@ -270,13 +266,13 @@ The `OllamaOptions` provides the configuration information for all chat requests
 
 == Low-level OllamaApi Client [[low-level-api]]
 
-The link:https://github.com/spring-projects/spring-ai/blob/main/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaApi.java[OllamaApi] provides is lightweight Java client for Ollama Chat API link:https://github.com/ollama/ollama/blob/main/docs/api.md#generate-a-chat-completion[Ollama Chat Completion API].
+The link:https://github.com/spring-projects/spring-ai/blob/main/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaApi.java[OllamaApi] provides a lightweight Java client for the Ollama Chat Completion API link:https://github.com/ollama/ollama/blob/main/docs/api.md#generate-a-chat-completion[Ollama Chat Completion API].
 
-Following class diagram illustrates the `OllamaApi` chat interfaces and building blocks:
+The following class diagram illustrates the `OllamaApi` chat interfaces and building blocks:
 
 image::ollama-chat-completion-api.jpg[OllamaApi Chat Completion API Diagram, 800, 600]
 
-Here is a simple snippet how to use the api programmatically:
+Here is a simple snippet showing how to use the API programmatically:
 
 [source,java]
 ----
@@ -288,7 +284,7 @@ var request = ChatRequest.builder("orca-mini")
     .withStream(false) // not streaming
     .withMessages(List.of(
             Message.builder(Role.SYSTEM)
-                .withContent("You are geography teacher. You are talking to a student.")
+                .withContent("You are a geography teacher. You are talking to a student.")
                 .build(),
             Message.builder(Role.USER)
                 .withContent("What is the capital of Bulgaria and what is the size? "
diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/embeddings/ollama-embeddings.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/embeddings/ollama-embeddings.adoc
diff --git a/spring-ai-spring-boot-autoconfigure/pom.xml b/spring-ai-spring-boot-autoconfigure/pom.xml
diff --git a/spring-ai-spring-boot-autoconfigure/src/test/java/org/springframework/ai/autoconfigure/ollama/OllamaEmbeddingAutoConfigurationIT.java b/spring-ai-spring-boot-autoconfigure/src/test/java/org/springframework/ai/autoconfigure/ollama/OllamaEmbeddingAutoConfigurationIT.java
diff --git a/spring-ai-spring-boot-autoconfigure/src/test/java/org/springframework/ai/autoconfigure/ollama/OllamaImage.java b/spring-ai-spring-boot-autoconfigure/src/test/java/org/springframework/ai/autoconfigure/ollama/OllamaImage.java