Allow Anthropic custom models to override temperature (#18160)

rowillia · web-flow · commit 5905fbb9accd · 2024-09-20T14:59:12.000-06:00
Release Notes:

- Allow Anthropic custom models to override "temperature"

This also centralized the defaulting of "temperature" to be inside of
each model's `into_x` call instead of being sprinkled around the code.
diff --git a/crates/anthropic/src/anthropic.rs b/crates/anthropic/src/anthropic.rs
@@ -49,6 +49,7 @@ pub enum Model {
         /// Indicates whether this custom model supports caching.
         cache_configuration: Option<AnthropicModelCacheConfiguration>,
         max_output_tokens: Option<u32>,
+        default_temperature: Option<f32>,
     },
 }
 
@@ -124,6 +125,19 @@ impl Model {
         }
     }
 
+    pub fn default_temperature(&self) -> f32 {
+        match self {
+            Self::Claude3_5Sonnet
+            | Self::Claude3Opus
+            | Self::Claude3Sonnet
+            | Self::Claude3Haiku => 1.0,
+            Self::Custom {
+                default_temperature,
+                ..
+            } => default_temperature.unwrap_or(1.0),
+        }
+    }
+
     pub fn tool_model_id(&self) -> &str {
         if let Self::Custom {
             tool_override: Some(tool_override),
diff --git a/crates/assistant/src/context.rs b/crates/assistant/src/context.rs
@@ -2180,7 +2180,7 @@ impl Context {
             messages: Vec::new(),
             tools: Vec::new(),
             stop: Vec::new(),
-            temperature: 1.0,
+            temperature: None,
         };
         for message in self.messages(cx) {
             if message.status != MessageStatus::Done {
diff --git a/crates/assistant/src/inline_assistant.rs b/crates/assistant/src/inline_assistant.rs
@@ -2732,7 +2732,7 @@ impl CodegenAlternative {
             messages,
             tools: Vec::new(),
             stop: Vec::new(),
-            temperature: 1.,
+            temperature: None,
         })
     }
 
diff --git a/crates/assistant/src/prompt_library.rs b/crates/assistant/src/prompt_library.rs
@@ -796,7 +796,7 @@ impl PromptLibrary {
                                     }],
                                     tools: Vec::new(),
                                     stop: Vec::new(),
-                                    temperature: 1.,
+                                    temperature: None,
                                 },
                                 cx,
                             )
diff --git a/crates/assistant/src/slash_command/auto_command.rs b/crates/assistant/src/slash_command/auto_command.rs
@@ -216,7 +216,7 @@ async fn commands_for_summaries(
         }],
         tools: Vec::new(),
         stop: Vec::new(),
-        temperature: 1.0,
+        temperature: None,
     };
 
     while let Some(current_summaries) = stack.pop() {
diff --git a/crates/assistant/src/terminal_inline_assistant.rs b/crates/assistant/src/terminal_inline_assistant.rs
@@ -284,7 +284,7 @@ impl TerminalInlineAssistant {
             messages,
             tools: Vec::new(),
             stop: Vec::new(),
-            temperature: 1.0,
+            temperature: None,
         })
     }
 
diff --git a/crates/language_model/src/provider/anthropic.rs b/crates/language_model/src/provider/anthropic.rs
@@ -51,6 +51,7 @@ pub struct AvailableModel {
     /// Configuration of Anthropic's caching API.
     pub cache_configuration: Option<LanguageModelCacheConfiguration>,
     pub max_output_tokens: Option<u32>,
+    pub default_temperature: Option<f32>,
 }
 
 pub struct AnthropicLanguageModelProvider {
@@ -200,6 +201,7 @@ impl LanguageModelProvider for AnthropicLanguageModelProvider {
                         }
                     }),
                     max_output_tokens: model.max_output_tokens,
+                    default_temperature: model.default_temperature,
                 },
             );
         }
@@ -375,8 +377,11 @@ impl LanguageModel for AnthropicModel {
         request: LanguageModelRequest,
         cx: &AsyncAppContext,
     ) -> BoxFuture<'static, Result<BoxStream<'static, Result<LanguageModelCompletionEvent>>>> {
-        let request =
-            request.into_anthropic(self.model.id().into(), self.model.max_output_tokens());
+        let request = request.into_anthropic(
+            self.model.id().into(),
+            self.model.default_temperature(),
+            self.model.max_output_tokens(),
+        );
         let request = self.stream_completion(request, cx);
         let future = self.request_limiter.stream(async move {
             let response = request.await.map_err(|err| anyhow!(err))?;
@@ -405,6 +410,7 @@ impl LanguageModel for AnthropicModel {
     ) -> BoxFuture<'static, Result<BoxStream<'static, Result<String>>>> {
         let mut request = request.into_anthropic(
             self.model.tool_model_id().into(),
+            self.model.default_temperature(),
             self.model.max_output_tokens(),
         );
         request.tool_choice = Some(anthropic::ToolChoice::Tool {
diff --git a/crates/language_model/src/provider/cloud.rs b/crates/language_model/src/provider/cloud.rs
@@ -87,6 +87,8 @@ pub struct AvailableModel {
     pub tool_override: Option<String>,
     /// Indicates whether this custom model supports caching.
     pub cache_configuration: Option<LanguageModelCacheConfiguration>,
+    /// The default temperature to use for this model.
+    pub default_temperature: Option<f32>,
 }
 
 pub struct CloudLanguageModelProvider {
@@ -255,6 +257,7 @@ impl LanguageModelProvider for CloudLanguageModelProvider {
                             min_total_token: config.min_total_token,
                         }
                     }),
+                    default_temperature: model.default_temperature,
                     max_output_tokens: model.max_output_tokens,
                 }),
                 AvailableProvider::OpenAi => CloudModel::OpenAi(open_ai::Model::Custom {
@@ -516,7 +519,11 @@ impl LanguageModel for CloudLanguageModel {
 
         match &self.model {
             CloudModel::Anthropic(model) => {
-                let request = request.into_anthropic(model.id().into(), model.max_output_tokens());
+                let request = request.into_anthropic(
+                    model.id().into(),
+                    model.default_temperature(),
+                    model.max_output_tokens(),
+                );
                 let client = self.client.clone();
                 let llm_api_token = self.llm_api_token.clone();
                 let future = self.request_limiter.stream(async move {
@@ -642,8 +649,11 @@ impl LanguageModel for CloudLanguageModel {
 
         match &self.model {
             CloudModel::Anthropic(model) => {
-                let mut request =
-                    request.into_anthropic(model.tool_model_id().into(), model.max_output_tokens());
+                let mut request = request.into_anthropic(
+                    model.tool_model_id().into(),
+                    model.default_temperature(),
+                    model.max_output_tokens(),
+                );
                 request.tool_choice = Some(anthropic::ToolChoice::Tool {
                     name: tool_name.clone(),
                 });
diff --git a/crates/language_model/src/provider/ollama.rs b/crates/language_model/src/provider/ollama.rs
@@ -235,7 +235,7 @@ impl OllamaLanguageModel {
             options: Some(ChatOptions {
                 num_ctx: Some(self.model.max_tokens),
                 stop: Some(request.stop),
-                temperature: Some(request.temperature),
+                temperature: request.temperature.or(Some(1.0)),
                 ..Default::default()
             }),
             tools: vec![],
diff --git a/crates/language_model/src/request.rs b/crates/language_model/src/request.rs
@@ -236,7 +236,7 @@ pub struct LanguageModelRequest {
     pub messages: Vec<LanguageModelRequestMessage>,
     pub tools: Vec<LanguageModelRequestTool>,
     pub stop: Vec<String>,
-    pub temperature: f32,
+    pub temperature: Option<f32>,
 }
 
 impl LanguageModelRequest {
@@ -262,7 +262,7 @@ impl LanguageModelRequest {
                 .collect(),
             stream,
             stop: self.stop,
-            temperature: self.temperature,
+            temperature: self.temperature.unwrap_or(1.0),
             max_tokens: max_output_tokens,
             tools: Vec::new(),
             tool_choice: None,
@@ -290,15 +290,20 @@ impl LanguageModelRequest {
                 candidate_count: Some(1),
                 stop_sequences: Some(self.stop),
                 max_output_tokens: None,
-                temperature: Some(self.temperature as f64),
+                temperature: self.temperature.map(|t| t as f64).or(Some(1.0)),
                 top_p: None,
                 top_k: None,
             }),
             safety_settings: None,
         }
     }
 
-    pub fn into_anthropic(self, model: String, max_output_tokens: u32) -> anthropic::Request {
+    pub fn into_anthropic(
+        self,
+        model: String,
+        default_temperature: f32,
+        max_output_tokens: u32,
+    ) -> anthropic::Request {
         let mut new_messages: Vec<anthropic::Message> = Vec::new();
         let mut system_message = String::new();
 
@@ -400,7 +405,7 @@ impl LanguageModelRequest {
             tool_choice: None,
             metadata: None,
             stop_sequences: Vec::new(),
-            temperature: Some(self.temperature),
+            temperature: self.temperature.or(Some(default_temperature)),
             top_k: None,
             top_p: None,
         }
diff --git a/crates/language_model/src/settings.rs b/crates/language_model/src/settings.rs
@@ -99,6 +99,7 @@ impl AnthropicSettingsContent {
                                     tool_override,
                                     cache_configuration,
                                     max_output_tokens,
+                                    default_temperature,
                                 } => Some(provider::anthropic::AvailableModel {
                                     name,
                                     display_name,
@@ -112,6 +113,7 @@ impl AnthropicSettingsContent {
                                         },
                                     ),
                                     max_output_tokens,
+                                    default_temperature,
                                 }),
                                 _ => None,
                             })
diff --git a/crates/semantic_index/src/summary_index.rs b/crates/semantic_index/src/summary_index.rs
@@ -562,7 +562,7 @@ impl SummaryIndex {
             }],
             tools: Vec::new(),
             stop: Vec::new(),
-            temperature: 1.0,
+            temperature: None,
         };
 
         let code_len = code.len();

Original file line number	Diff line number	Diff line change
`@@ -2732,7 +2732,7 @@ impl CodegenAlternative {`
`2732`	`2732`	`messages,`
`2733`	`2733`	`tools: Vec::new(),`
`2734`	`2734`	`stop: Vec::new(),`
`2735`		`- temperature: 1.,`
	`2735`	`+ temperature: None,`
`2736`	`2736`	`})`
`2737`	`2737`	`}`
`2738`	`2738`
Original file line number	Diff line number	Diff line change
`@@ -796,7 +796,7 @@ impl PromptLibrary {`
`796`	`796`	`}],`
`797`	`797`	`tools: Vec::new(),`
`798`	`798`	`stop: Vec::new(),`
`799`		`- temperature: 1.,`
	`799`	`+ temperature: None,`
`800`	`800`	`},`
`801`	`801`	`cx,`
`802`	`802`	`)`
Original file line number	Diff line number	Diff line change
`@@ -284,7 +284,7 @@ impl TerminalInlineAssistant {`
`284`	`284`	`messages,`
`285`	`285`	`tools: Vec::new(),`
`286`	`286`	`stop: Vec::new(),`
`287`		`- temperature: 1.0,`
	`287`	`+ temperature: None,`
`288`	`288`	`})`
`289`	`289`	`}`
`290`	`290`