Skip to content

Commit 923e09a

Browse files
GOODBOY008ilayaperumalg
authored andcommitted
feat(ollama): Add min_p parameter for improved sampling control
- Add min_p option - Add qwq model Signed-off-by: gongzhongqiang <gongzhongqiang@apache.org>
1 parent 9a428f4 commit 923e09a

File tree

4 files changed

+35
-3
lines changed

4 files changed

+35
-3
lines changed

models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaModel.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@ public enum OllamaModel implements ChatModelDescription {
3232
*/
3333
QWEN_2_5_7B("qwen2.5"),
3434

35+
/**
36+
* QwQ is the reasoning model of the Qwen series.
37+
*/
38+
QWQ("qwq"),
39+
3540
/**
3641
* Llama 2 is a collection of language models ranging from 7B to 70B parameters.
3742
*/

models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaOptions.java

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,16 @@ public class OllamaOptions implements ToolCallingChatOptions, EmbeddingOptions {
191191
@JsonProperty("top_p")
192192
private Double topP;
193193

194+
/**
195+
* Alternative to the top_p, and aims to ensure a balance of quality and variety.
196+
* The parameter p represents the minimum probability for a token to be considered,
197+
* relative to the probability of the most likely token. For example, with p=0.05 and
198+
* the most likely token having a probability of 0.9, logits with a value
199+
* less than 0.045 are filtered out. (Default: 0.0)
200+
*/
201+
@JsonProperty("min_p")
202+
private Double minP;
203+
194204
/**
195205
* Tail free sampling is used to reduce the impact of less probable tokens
196206
* from the output. A higher value (e.g., 2.0) will reduce the impact more, while a
@@ -372,6 +382,7 @@ public static OllamaOptions fromOptions(OllamaOptions fromOptions) {
372382
.numPredict(fromOptions.getNumPredict())
373383
.topK(fromOptions.getTopK())
374384
.topP(fromOptions.getTopP())
385+
.minP(fromOptions.getMinP())
375386
.tfsZ(fromOptions.getTfsZ())
376387
.typicalP(fromOptions.getTypicalP())
377388
.repeatLastN(fromOptions.getRepeatLastN())
@@ -567,6 +578,14 @@ public void setTopP(Double topP) {
567578
this.topP = topP;
568579
}
569580

581+
public Double getMinP() {
582+
return this.minP;
583+
}
584+
585+
public void setMinP(Double minP) {
586+
this.minP = minP;
587+
}
588+
570589
public Float getTfsZ() {
571590
return this.tfsZ;
572591
}
@@ -819,8 +838,9 @@ public boolean equals(Object o) {
819838
&& Objects.equals(this.useMLock, that.useMLock) && Objects.equals(this.numThread, that.numThread)
820839
&& Objects.equals(this.numKeep, that.numKeep) && Objects.equals(this.seed, that.seed)
821840
&& Objects.equals(this.numPredict, that.numPredict) && Objects.equals(this.topK, that.topK)
822-
&& Objects.equals(this.topP, that.topP) && Objects.equals(this.tfsZ, that.tfsZ)
823-
&& Objects.equals(this.typicalP, that.typicalP) && Objects.equals(this.repeatLastN, that.repeatLastN)
841+
&& Objects.equals(this.topP, that.topP) && Objects.equals(this.minP, that.minP)
842+
&& Objects.equals(this.tfsZ, that.tfsZ) && Objects.equals(this.typicalP, that.typicalP)
843+
&& Objects.equals(this.repeatLastN, that.repeatLastN)
824844
&& Objects.equals(this.temperature, that.temperature)
825845
&& Objects.equals(this.repeatPenalty, that.repeatPenalty)
826846
&& Objects.equals(this.presencePenalty, that.presencePenalty)
@@ -838,7 +858,7 @@ public int hashCode() {
838858
return Objects.hash(this.model, this.format, this.keepAlive, this.truncate, this.useNUMA, this.numCtx,
839859
this.numBatch, this.numGPU, this.mainGPU, this.lowVRAM, this.f16KV, this.logitsAll, this.vocabOnly,
840860
this.useMMap, this.useMLock, this.numThread, this.numKeep, this.seed, this.numPredict, this.topK,
841-
this.topP, this.tfsZ, this.typicalP, this.repeatLastN, this.temperature, this.repeatPenalty,
861+
this.topP, this.minP, this.tfsZ, this.typicalP, this.repeatLastN, this.temperature, this.repeatPenalty,
842862
this.presencePenalty, this.frequencyPenalty, this.mirostat, this.mirostatTau, this.mirostatEta,
843863
this.penalizeNewline, this.stop, this.toolCallbacks, this.toolNames, this.internalToolExecutionEnabled,
844864
this.toolContext);
@@ -958,6 +978,11 @@ public Builder topP(Double topP) {
958978
return this;
959979
}
960980

981+
public Builder minP(Double minP) {
982+
this.options.minP = minP;
983+
return this;
984+
}
985+
961986
public Builder tfsZ(Float tfsZ) {
962987
this.options.tfsZ = tfsZ;
963988
return this;

spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/ollama-chat.adoc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ The remaining `options` properties are based on the link:https://github.com/olla
137137
| spring.ai.ollama.chat.options.num-predict | Maximum number of tokens to predict when generating text. (-1 = infinite generation, -2 = fill context) | -1
138138
| spring.ai.ollama.chat.options.top-k | Reduces the probability of generating nonsense. A higher value (e.g., 100) will give more diverse answers, while a lower value (e.g., 10) will be more conservative. | 40
139139
| spring.ai.ollama.chat.options.top-p | Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. | 0.9
140+
| spring.ai.ollama.chat.options.min-p | Alternative to the top_p, and aims to ensure a balance of quality and variety. The parameter p represents the minimum probability for a token to be considered, relative to the probability of the most likely token. For example, with p=0.05 and the most likely token having a probability of 0.9, logits with a value less than 0.045 are filtered out. | 0.0
140141
| spring.ai.ollama.chat.options.tfs-z | Tail-free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. | 1.0
141142
| spring.ai.ollama.chat.options.typical-p | - | 1.0
142143
| spring.ai.ollama.chat.options.repeat-last-n | Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx) | 64

spring-ai-docs/src/main/antora/modules/ROOT/pages/api/embeddings/ollama-embeddings.adoc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ The remaining `options` properties are based on the link:https://github.com/olla
142142
| spring.ai.ollama.embedding.options.num-predict | Maximum number of tokens to predict when generating text. (-1 = infinite generation, -2 = fill context) | -1
143143
| spring.ai.ollama.embedding.options.top-k | Reduces the probability of generating nonsense. A higher value (e.g., 100) will give more diverse answers, while a lower value (e.g., 10) will be more conservative. | 40
144144
| spring.ai.ollama.embedding.options.top-p | Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. | 0.9
145+
| spring.ai.ollama.embedding.options.min-p | Alternative to the top_p, and aims to ensure a balance of quality and variety. The parameter p represents the minimum probability for a token to be considered, relative to the probability of the most likely token. For example, with p=0.05 and the most likely token having a probability of 0.9, logits with a value less than 0.045 are filtered out. | 0.0
145146
| spring.ai.ollama.embedding.options.tfs-z | Tail-free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. | 1.0
146147
| spring.ai.ollama.embedding.options.typical-p | - | 1.0
147148
| spring.ai.ollama.embedding.options.repeat-last-n | Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx) | 64

0 commit comments

Comments
 (0)