add reasoning model support

ifsheldon · ifsheldon · commit beb8c234e2f0 · 2025-02-19T16:59:11.000+08:00
diff --git a/async-openai-wasm/README.md b/async-openai-wasm/README.md
@@ -36,6 +36,7 @@ a `x.y.z` version.
     - [x] Realtime (Beta) (partially implemented)
     - [x] Uploads
     - [x] **WASM support**
+    - [x] Reasoning Model Support: support models like DeepSeek R1 via broader support for OpenAI-compatible endpoints, see `examples/reasoning`
 - SSE streaming on available APIs
 - Ergonomic builder pattern for all request objects.
 - Microsoft Azure OpenAI Service (only for APIs matching OpenAI spec)
@@ -49,6 +50,8 @@ maintain parity with spec of AOS. Just like `async-openai`.
 + * WASM support
 + * WASM examples
 + * Realtime API: Does not bundle with a specific WS implementation. Need to convert a client event into a WS message by yourself, which is just simple `your_ws_impl::Message::Text(some_client_event.into_text())`
++ * Broader support for OpenAI-compatible Endpoints
++ * Reasoning Model Support
 - * Tokio
 - * Non-wasm examples: please refer to the original project [async-openai](https://github.com/64bit/async-openai/).
 - * Builtin backoff retries: due to [this issue](https://github.com/ihrwein/backoff/issues/61). 
diff --git a/async-openai-wasm/src/types/chat.rs b/async-openai-wasm/src/types/chat.rs
@@ -430,6 +430,11 @@ pub struct ChatCompletionResponseMessage {
 
     /// If the audio output modality is requested, this object contains data about the audio response from the model. [Learn more](https://platform.openai.com/docs/guides/audio).
     pub audio: Option<ChatCompletionResponseMessageAudio>,
+
+    /// Catching anything else that a provider wants to provide, for example, a `reasoning` field
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[serde(flatten)]
+    pub return_catchall: Option<serde_json::Value>,
 }
 
 #[derive(Clone, Serialize, Default, Debug, Deserialize, Builder, PartialEq)]
@@ -816,6 +821,10 @@ pub struct CreateChatCompletionRequest {
     #[deprecated]
     #[serde(skip_serializing_if = "Option::is_none")]
     pub functions: Option<Vec<ChatCompletionFunctions>>,
+
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[serde(flatten)]
+    pub extra_params: Option<serde_json::Value>,
 }
 
 /// Options for streaming response. Only set this when you set `stream: true`.
@@ -899,6 +908,11 @@ pub struct CreateChatCompletionResponse {
     /// The object type, which is always `chat.completion`.
     pub object: String,
     pub usage: Option<CompletionUsage>,
+
+    /// Catching anything else that a provider wants to provide
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[serde(flatten)]
+    pub return_catchall: Option<serde_json::Value>,
 }
 
 /// Parsed server side events stream until an \[DONE\] is received from server.
@@ -939,6 +953,11 @@ pub struct ChatCompletionStreamResponseDelta {
     pub role: Option<Role>,
     /// The refusal message generated by the model.
     pub refusal: Option<String>,
+
+    /// Catching anything else that a provider wants to provide, for example, a `reasoning` field
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[serde(flatten)]
+    pub return_catchall: Option<serde_json::Value>,
 }
 
 #[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
@@ -984,4 +1003,9 @@ pub struct CreateChatCompletionStreamResponse {
     /// An optional field that will only be present when you set `stream_options: {"include_usage": true}` in your request.
     /// When present, it contains a null value except for the last chunk which contains the token usage statistics for the entire request.
     pub usage: Option<CompletionUsage>,
+
+    /// Catching anything else that a provider wants to provide
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[serde(flatten)]
+    pub return_catchall: Option<serde_json::Value>,
 }
diff --git a/async-openai-wasm/tests/chat_completion.rs b/async-openai-wasm/tests/chat_completion.rs
@@ -0,0 +1,101 @@
+use async_openai_wasm::config::OpenAIConfig;
+use async_openai_wasm::types::{
+    ChatCompletionRequestUserMessageArgs, CreateChatCompletionRequestArgs,
+};
+use async_openai_wasm::Client;
+use futures::StreamExt;
+use serde_json::json;
+
+const OPENROUTER_REASONING_KEY: &str = "reasoning";
+const OPENROUTER_BASEURL: &str = "https://openrouter.ai/api/v1";
+const DEEPSEEK_REASONING_KEY: &str = "reasoning_content";
+const DEEPSEEK_BASEURL: &str = "https://api.deepseek.com";
+
+#[tokio::test]
+async fn test_chat_completion_reasoning() {
+    let test_key = std::env::var("TEST_API_KEY").unwrap();
+    let use_deepseek = std::env::var("USE_DEEPSEEK").is_ok();
+    let (reasoning_key, base_url) = if use_deepseek {
+        (DEEPSEEK_REASONING_KEY, DEEPSEEK_BASEURL)
+    } else {
+        (OPENROUTER_REASONING_KEY, OPENROUTER_BASEURL)
+    };
+    let client = Client::with_config(
+        OpenAIConfig::new()
+            .with_api_base(base_url)
+            .with_api_key(test_key),
+    );
+    let request = CreateChatCompletionRequestArgs::default()
+        .messages(vec![ChatCompletionRequestUserMessageArgs::default()
+            .content("Hello! Do you know the Rust programming language?")
+            .build()
+            .unwrap()
+            .into()])
+        .model("deepseek/deepseek-r1")
+        // The extra params that OpenRouter requires to get reasoning content
+        // See https://openrouter.ai/docs/api-reference/parameters#include-reasoning
+        .extra_params(json!({
+            "include_reasoning" : true
+        }))
+        .build()
+        .unwrap();
+    let result = client.chat().create(request).await.unwrap();
+    // Get the reasoning field in the response
+    let catch_all_result = result.choices[0].message.return_catchall.as_ref().unwrap();
+    let reasoning = catch_all_result
+        .get(reasoning_key)
+        .unwrap()
+        .as_str()
+        .unwrap();
+    assert!(reasoning.len() > 0);
+    println!("Reasoning: {reasoning}");
+}
+
+#[tokio::test]
+async fn test_chat_completion_reasoning_stream() {
+    let test_key = std::env::var("TEST_API_KEY").unwrap();
+    let use_deepseek = std::env::var("USE_DEEPSEEK").is_ok();
+    let (reasoning_key, base_url) = if use_deepseek {
+        (DEEPSEEK_REASONING_KEY, DEEPSEEK_BASEURL)
+    } else {
+        (OPENROUTER_REASONING_KEY, OPENROUTER_BASEURL)
+    };
+    let client = Client::with_config(
+        OpenAIConfig::new()
+            .with_api_base(base_url)
+            .with_api_key(test_key),
+    );
+    let request = CreateChatCompletionRequestArgs::default()
+        .messages(vec![ChatCompletionRequestUserMessageArgs::default()
+            .content("Hello! Do you know the Rust programming language?")
+            .build()
+            .unwrap()
+            .into()])
+        .model("deepseek/deepseek-r1")
+        // The extra params that OpenRouter requires to get reasoning content
+        // See https://openrouter.ai/docs/api-reference/parameters#include-reasoning
+        .extra_params(json!({
+            "include_reasoning" : true
+        }))
+        .build()
+        .unwrap();
+
+    let mut result = client.chat().create_stream(request).await.unwrap();
+    let mut reasoning = String::new();
+
+    while let Some(result) = result.next().await {
+        if let Ok(r) = result {
+            // Get the reasoning field in the response
+            let catch_all_return = r.choices[0].delta.return_catchall.as_ref();
+            let reasoning_part = catch_all_return
+                .and_then(|val| val.get(reasoning_key))
+                .and_then(|r| r.as_str());
+            if let Some(reasoning_part) = reasoning_part {
+                reasoning.push_str(reasoning_part);
+                println!("Reasoning Part: {reasoning_part}")
+            }
+        }
+    }
+    assert!(reasoning.len() > 0);
+    println!("Reasoning:\n{reasoning}");
+}
diff --git a/examples/reasoning/Cargo.toml b/examples/reasoning/Cargo.toml
@@ -0,0 +1,13 @@
+[package]
+name = "reasoning-example"
+version = "0.1.0"
+edition = "2021"
+publish = false
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+async-openai-wasm = { path = "../../async-openai-wasm", features = ["realtime"] }
+serde_json = "1.0.135"
+futures = "0.3"
+tokio = { version = "1.43", features = ["fs", "macros"] }
diff --git a/examples/reasoning/src/main.rs b/examples/reasoning/src/main.rs
@@ -0,0 +1,62 @@
+use async_openai_wasm::config::OpenAIConfig;
+use async_openai_wasm::types::{
+    ChatCompletionRequestUserMessageArgs, CreateChatCompletionRequestArgs,
+};
+use async_openai_wasm::Client;
+use futures::StreamExt;
+use serde_json::json;
+
+const OPENROUTER_REASONING_KEY: &str = "reasoning";
+const OPENROUTER_BASEURL: &str = "https://openrouter.ai/api/v1";
+const DEEPSEEK_REASONING_KEY: &str = "reasoning_content";
+const DEEPSEEK_BASEURL: &str = "https://api.deepseek.com";
+
+
+#[tokio::main(flavor = "current_thread")]
+async fn main() {
+    let test_key = std::env::var("TEST_API_KEY").unwrap();
+    let use_deepseek = std::env::var("USE_DEEPSEEK").is_ok();
+    let (reasoning_key, base_url) = if use_deepseek {
+        (DEEPSEEK_REASONING_KEY, DEEPSEEK_BASEURL)
+    } else {
+        (OPENROUTER_REASONING_KEY, OPENROUTER_BASEURL)
+    };
+    let client = Client::with_config(
+        OpenAIConfig::new()
+            .with_api_base(base_url)
+            .with_api_key(test_key),
+    );
+    let request = CreateChatCompletionRequestArgs::default()
+        .messages(vec![ChatCompletionRequestUserMessageArgs::default()
+            .content("Hello! Do you know the Rust programming language?")
+            .build()
+            .unwrap()
+            .into()])
+        .model("deepseek/deepseek-r1")
+        // The extra params that OpenRouter requires to get reasoning content
+        // See https://openrouter.ai/docs/api-reference/parameters#include-reasoning
+        .extra_params(json!({
+            "include_reasoning" : true
+        }))
+        .build()
+        .unwrap();
+
+    let mut result = client.chat().create_stream(request).await.unwrap();
+    let mut reasoning = String::new();
+
+    while let Some(result) = result.next().await {
+        if let Ok(r) = result {
+            // Get the reasoning field in the response
+            let catch_all_return = r.choices[0].delta.return_catchall.as_ref();
+            let reasoning_part = catch_all_return
+                .and_then(|val| val.get(reasoning_key))
+                .and_then(|r| r.as_str());
+            if let Some(reasoning_part) = reasoning_part {
+                reasoning.push_str(reasoning_part);
+                println!("Reasoning Part: {reasoning_part}")
+            }
+        }
+    }
+    assert!(reasoning.len() > 0);
+    println!("Reasoning:\n{reasoning}");
+}