Propagate reactive Context to AsyncMcpToolCallback

Kehrlann · tzolov · commit 9a521827277f · 2025-07-01T12:33:23.000+02:00
- When calling tools while using ChatModel#stream, store the reactive
  context in a thread-local, so it can be used by downstream reactive
  tools.
- In AsyncMcpToolCallback, restore the reactive context so it can be
  accessed by the tool. This will be useful for Spring Security OAuth2
  support in reactive scenarios, because it relies on the context.

Signed-off-by: Daniel Garnier-Moiroux &lt;git@garnier.wf&gt;
diff --git a/mcp/common/src/main/java/org/springframework/ai/mcp/AsyncMcpToolCallback.java b/mcp/common/src/main/java/org/springframework/ai/mcp/AsyncMcpToolCallback.java
@@ -23,6 +23,7 @@
 
 import org.springframework.ai.chat.model.ToolContext;
 import org.springframework.ai.model.ModelOptionsUtils;
+import org.springframework.ai.model.tool.internal.ToolCallReactiveContextHolder;
 import org.springframework.ai.tool.ToolCallback;
 import org.springframework.ai.tool.definition.DefaultToolDefinition;
 import org.springframework.ai.tool.definition.ToolDefinition;
@@ -120,7 +121,7 @@ public String call(String functionInput) {
 						new IllegalStateException("Error calling tool: " + response.content()));
 			}
 			return ModelOptionsUtils.toJsonString(response.content());
-		}).block();
+		}).contextWrite(ctx -> ctx.putAll(ToolCallReactiveContextHolder.getContext())).block();
 	}
 
 	@Override
diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatModel.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatModel.java
@@ -64,6 +64,7 @@
 import org.springframework.ai.content.Media;
 import org.springframework.ai.model.ModelOptionsUtils;
 import org.springframework.ai.model.tool.DefaultToolExecutionEligibilityPredicate;
+import org.springframework.ai.model.tool.internal.ToolCallReactiveContextHolder;
 import org.springframework.ai.model.tool.ToolCallingChatOptions;
 import org.springframework.ai.model.tool.ToolCallingManager;
 import org.springframework.ai.model.tool.ToolExecutionEligibilityPredicate;
@@ -265,8 +266,15 @@ public Flux<ChatResponse> internalStream(Prompt prompt, ChatResponse previousCha
 					if (chatResponse.hasFinishReasons(Set.of("tool_use"))) {
 						// FIXME: bounded elastic needs to be used since tool calling
 						//  is currently only synchronous
-						return Flux.defer(() -> {
-							var toolExecutionResult = this.toolCallingManager.executeToolCalls(prompt, chatResponse);
+						return Flux.deferContextual((ctx) -> {
+							// TODO: factor out the tool execution logic with setting context into a uitlity.
+							ToolExecutionResult toolExecutionResult;
+							try {
+								ToolCallReactiveContextHolder.setContext(ctx);
+								toolExecutionResult = this.toolCallingManager.executeToolCalls(prompt, chatResponse);
+							} finally {
+								ToolCallReactiveContextHolder.clearContext();
+							}
 							if (toolExecutionResult.returnDirect()) {
 								// Return tool execution result directly to the client.
 								return Flux.just(ChatResponse.builder().from(chatResponse)
@@ -279,6 +287,7 @@ public Flux<ChatResponse> internalStream(Prompt prompt, ChatResponse previousCha
 										chatResponse);
 							}
 						}).subscribeOn(Schedulers.boundedElastic());
+
 					} else {						
 						return Mono.empty();
 					}
diff --git a/models/spring-ai-anthropic/src/test/resources/application-logging-test.properties b/models/spring-ai-anthropic/src/test/resources/application-logging-test.properties
@@ -16,4 +16,4 @@
 
 logging.level.org.springframework.ai.chat.client.advisor=DEBUG
 
-logging.level.org.springframework.ai.anthropic.api.AnthropicApi=DEBUG
+logging.level.org.springframework.ai.anthropic.api.AnthropicApi=INFO
diff --git a/models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiChatModel.java b/models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiChatModel.java
@@ -95,6 +95,7 @@
 import org.springframework.ai.model.tool.ToolCallingManager;
 import org.springframework.ai.model.tool.ToolExecutionEligibilityPredicate;
 import org.springframework.ai.model.tool.ToolExecutionResult;
+import org.springframework.ai.model.tool.internal.ToolCallReactiveContextHolder;
 import org.springframework.ai.observation.conventions.AiProvider;
 import org.springframework.ai.support.UsageCalculator;
 import org.springframework.ai.tool.definition.ToolDefinition;
@@ -380,8 +381,15 @@ public Flux<ChatResponse> internalStream(Prompt prompt, ChatResponse previousCha
 				if (this.toolExecutionEligibilityPredicate.isToolExecutionRequired(prompt.getOptions(), chatResponse)) {
 					// FIXME: bounded elastic needs to be used since tool calling
 					// is currently only synchronous
-					return Flux.defer(() -> {
-						var toolExecutionResult = this.toolCallingManager.executeToolCalls(prompt, chatResponse);
+					return Flux.deferContextual((ctx) -> {
+						ToolExecutionResult toolExecutionResult;
+						try {
+							ToolCallReactiveContextHolder.setContext(ctx);
+							toolExecutionResult = this.toolCallingManager.executeToolCalls(prompt, chatResponse);
+						}
+						finally {
+							ToolCallReactiveContextHolder.clearContext();
+						}
 						if (toolExecutionResult.returnDirect()) {
 							// Return tool execution result directly to the client.
 							return Flux.just(ChatResponse.builder()
diff --git a/models/spring-ai-bedrock-converse/src/main/java/org/springframework/ai/bedrock/converse/BedrockProxyChatModel.java b/models/spring-ai-bedrock-converse/src/main/java/org/springframework/ai/bedrock/converse/BedrockProxyChatModel.java
@@ -101,6 +101,7 @@
 import org.springframework.ai.model.tool.ToolCallingManager;
 import org.springframework.ai.model.tool.ToolExecutionEligibilityPredicate;
 import org.springframework.ai.model.tool.ToolExecutionResult;
+import org.springframework.ai.model.tool.internal.ToolCallReactiveContextHolder;
 import org.springframework.ai.observation.conventions.AiProvider;
 import org.springframework.ai.tool.definition.ToolDefinition;
 import org.springframework.util.Assert;
@@ -681,8 +682,15 @@ private Flux<ChatResponse> internalStream(Prompt prompt, ChatResponse perviousCh
 
 					// FIXME: bounded elastic needs to be used since tool calling
 					// is currently only synchronous
-					return Flux.defer(() -> {
-						var toolExecutionResult = this.toolCallingManager.executeToolCalls(prompt, chatResponse);
+					return Flux.deferContextual((ctx) -> {
+						ToolExecutionResult toolExecutionResult;
+						try {
+							ToolCallReactiveContextHolder.setContext(ctx);
+							toolExecutionResult = this.toolCallingManager.executeToolCalls(prompt, chatResponse);
+						}
+						finally {
+							ToolCallReactiveContextHolder.clearContext();
+						}
 
 						if (toolExecutionResult.returnDirect()) {
 							// Return tool execution result directly to the client.
diff --git a/models/spring-ai-deepseek/src/main/java/org/springframework/ai/deepseek/DeepSeekChatModel.java b/models/spring-ai-deepseek/src/main/java/org/springframework/ai/deepseek/DeepSeekChatModel.java
@@ -62,6 +62,7 @@
 import org.springframework.ai.model.tool.ToolCallingManager;
 import org.springframework.ai.model.tool.ToolExecutionEligibilityPredicate;
 import org.springframework.ai.model.tool.ToolExecutionResult;
+import org.springframework.ai.model.tool.internal.ToolCallReactiveContextHolder;
 import org.springframework.ai.retry.RetryUtils;
 import org.springframework.ai.support.UsageCalculator;
 import org.springframework.ai.tool.definition.ToolDefinition;
@@ -286,10 +287,16 @@ public Flux<ChatResponse> internalStream(Prompt prompt, ChatResponse previousCha
 			// @formatter:off
 			Flux<ChatResponse> flux = chatResponse.flatMap(response -> {
 				if (this.toolExecutionEligibilityPredicate.isToolExecutionRequired(prompt.getOptions(), response)) {
-					return Flux.defer(() -> {
-						// FIXME: bounded elastic needs to be used since tool calling
-						//  is currently only synchronous
-						var toolExecutionResult = this.toolCallingManager.executeToolCalls(prompt, response);
+					// FIXME: bounded elastic needs to be used since tool calling
+					//  is currently only synchronous
+					return Flux.deferContextual((ctx) -> {
+						ToolExecutionResult toolExecutionResult;
+						try {
+							ToolCallReactiveContextHolder.setContext(ctx);
+							toolExecutionResult = this.toolCallingManager.executeToolCalls(prompt, response);
+						} finally {
+							ToolCallReactiveContextHolder.clearContext();
+						}
 						if (toolExecutionResult.returnDirect()) {
 							// Return tool execution result directly to the client.
 							return Flux.just(ChatResponse.builder().from(response)
diff --git a/models/spring-ai-minimax/src/main/java/org/springframework/ai/minimax/MiniMaxChatModel.java b/models/spring-ai-minimax/src/main/java/org/springframework/ai/minimax/MiniMaxChatModel.java
@@ -65,6 +65,7 @@
 import org.springframework.ai.model.tool.ToolCallingManager;
 import org.springframework.ai.model.tool.ToolExecutionEligibilityPredicate;
 import org.springframework.ai.model.tool.ToolExecutionResult;
+import org.springframework.ai.model.tool.internal.ToolCallReactiveContextHolder;
 import org.springframework.ai.retry.RetryUtils;
 import org.springframework.ai.tool.definition.ToolDefinition;
 import org.springframework.http.ResponseEntity;
@@ -370,10 +371,16 @@ public Flux<ChatResponse> stream(Prompt prompt) {
 
 			Flux<ChatResponse> flux = chatResponse.flatMap(response -> {
 						if (this.toolExecutionEligibilityPredicate.isToolExecutionRequired(requestPrompt.getOptions(), response)) {
-							return Flux.defer(() -> {
-								// FIXME: bounded elastic needs to be used since tool calling
-								//  is currently only synchronous
-								var toolExecutionResult = this.toolCallingManager.executeToolCalls(requestPrompt, response);
+							// FIXME: bounded elastic needs to be used since tool calling
+							//  is currently only synchronous
+							return Flux.deferContextual((ctx) -> {
+								ToolExecutionResult toolExecutionResult;
+								try {
+									ToolCallReactiveContextHolder.setContext(ctx);
+									toolExecutionResult = this.toolCallingManager.executeToolCalls(prompt, response);
+								} finally {
+									ToolCallReactiveContextHolder.clearContext();
+								}
 								if (toolExecutionResult.returnDirect()) {
 									// Return tool execution result directly to the client.
 									return Flux.just(ChatResponse.builder().from(response)
diff --git a/models/spring-ai-mistral-ai/src/main/java/org/springframework/ai/mistralai/MistralAiChatModel.java b/models/spring-ai-mistral-ai/src/main/java/org/springframework/ai/mistralai/MistralAiChatModel.java
@@ -64,6 +64,7 @@
 import org.springframework.ai.model.tool.ToolCallingManager;
 import org.springframework.ai.model.tool.ToolExecutionEligibilityPredicate;
 import org.springframework.ai.model.tool.ToolExecutionResult;
+import org.springframework.ai.model.tool.internal.ToolCallReactiveContextHolder;
 import org.springframework.ai.retry.RetryUtils;
 import org.springframework.ai.support.UsageCalculator;
 import org.springframework.ai.tool.definition.ToolDefinition;
@@ -316,8 +317,14 @@ public Flux<ChatResponse> internalStream(Prompt prompt, ChatResponse previousCha
 				if (this.toolExecutionEligibilityPredicate.isToolExecutionRequired(prompt.getOptions(), response)) {
 					// FIXME: bounded elastic needs to be used since tool calling
 					//  is currently only synchronous
-					return Flux.defer(() -> {
-						var toolExecutionResult = this.toolCallingManager.executeToolCalls(prompt, response);
+					return Flux.deferContextual((ctx) -> {
+						ToolExecutionResult toolExecutionResult;
+						try {
+							ToolCallReactiveContextHolder.setContext(ctx);
+							toolExecutionResult = this.toolCallingManager.executeToolCalls(prompt, response);
+						} finally {
+							ToolCallReactiveContextHolder.clearContext();
+						}
 						if (toolExecutionResult.returnDirect()) {
 							// Return tool execution result directly to the client.
 							return Flux.just(ChatResponse.builder().from(response)
diff --git a/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/OllamaChatModel.java b/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/OllamaChatModel.java
@@ -54,6 +54,7 @@
 import org.springframework.ai.model.tool.ToolCallingManager;
 import org.springframework.ai.model.tool.ToolExecutionEligibilityPredicate;
 import org.springframework.ai.model.tool.ToolExecutionResult;
+import org.springframework.ai.model.tool.internal.ToolCallReactiveContextHolder;
 import org.springframework.ai.ollama.api.OllamaApi;
 import org.springframework.ai.ollama.api.OllamaApi.ChatRequest;
 import org.springframework.ai.ollama.api.OllamaApi.Message.Role;
@@ -351,8 +352,14 @@ private Flux<ChatResponse> internalStream(Prompt prompt, ChatResponse previousCh
 				if (this.toolExecutionEligibilityPredicate.isToolExecutionRequired(prompt.getOptions(), response)) {
 					// FIXME: bounded elastic needs to be used since tool calling
 					//  is currently only synchronous
-					return Flux.defer(() -> {
-						var toolExecutionResult = this.toolCallingManager.executeToolCalls(prompt, response);
+					return Flux.deferContextual((ctx) -> {
+						ToolExecutionResult toolExecutionResult;
+						try {
+							ToolCallReactiveContextHolder.setContext(ctx);
+							toolExecutionResult = this.toolCallingManager.executeToolCalls(prompt, response);
+						} finally {
+							ToolCallReactiveContextHolder.clearContext();
+						}
 						if (toolExecutionResult.returnDirect()) {
 							// Return tool execution result directly to the client.
 							return Flux.just(ChatResponse.builder().from(response)
diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiChatModel.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiChatModel.java
@@ -61,6 +61,7 @@
 import org.springframework.ai.model.tool.ToolCallingManager;
 import org.springframework.ai.model.tool.ToolExecutionEligibilityPredicate;
 import org.springframework.ai.model.tool.ToolExecutionResult;
+import org.springframework.ai.model.tool.internal.ToolCallReactiveContextHolder;
 import org.springframework.ai.openai.api.OpenAiApi;
 import org.springframework.ai.openai.api.OpenAiApi.ChatCompletion;
 import org.springframework.ai.openai.api.OpenAiApi.ChatCompletion.Choice;
@@ -363,10 +364,16 @@ public Flux<ChatResponse> internalStream(Prompt prompt, ChatResponse previousCha
 			// @formatter:off
 			Flux<ChatResponse> flux = chatResponse.flatMap(response -> {
 				if (this.toolExecutionEligibilityPredicate.isToolExecutionRequired(prompt.getOptions(), response)) {
-					return Flux.defer(() -> {
-						// FIXME: bounded elastic needs to be used since tool calling
-						//  is currently only synchronous
-						var toolExecutionResult = this.toolCallingManager.executeToolCalls(prompt, response);
+					// FIXME: bounded elastic needs to be used since tool calling
+					//  is currently only synchronous
+					return Flux.deferContextual((ctx) -> {
+						ToolExecutionResult toolExecutionResult;
+						try {
+							ToolCallReactiveContextHolder.setContext(ctx);
+							toolExecutionResult = this.toolCallingManager.executeToolCalls(prompt, response);
+						} finally {
+							ToolCallReactiveContextHolder.clearContext();
+						}
 						if (toolExecutionResult.returnDirect()) {
 							// Return tool execution result directly to the client.
 							return Flux.just(ChatResponse.builder().from(response)
diff --git a/models/spring-ai-vertex-ai-gemini/src/main/java/org/springframework/ai/vertexai/gemini/VertexAiGeminiChatModel.java b/models/spring-ai-vertex-ai-gemini/src/main/java/org/springframework/ai/vertexai/gemini/VertexAiGeminiChatModel.java
@@ -81,6 +81,7 @@
 import org.springframework.ai.model.tool.ToolCallingManager;
 import org.springframework.ai.model.tool.ToolExecutionEligibilityPredicate;
 import org.springframework.ai.model.tool.ToolExecutionResult;
+import org.springframework.ai.model.tool.internal.ToolCallReactiveContextHolder;
 import org.springframework.ai.retry.RetryUtils;
 import org.springframework.ai.support.UsageCalculator;
 import org.springframework.ai.tool.definition.ToolDefinition;
@@ -540,9 +541,15 @@ public Flux<ChatResponse> internalStream(Prompt prompt, ChatResponse previousCha
 				Flux<ChatResponse> flux = chatResponseFlux.flatMap(response -> {
 					if (this.toolExecutionEligibilityPredicate.isToolExecutionRequired(prompt.getOptions(), response)) {
 						// FIXME: bounded elastic needs to be used since tool calling
-						// is currently only synchronous
-						return Flux.defer(() -> {
-							var toolExecutionResult = this.toolCallingManager.executeToolCalls(prompt, response);
+						//  is currently only synchronous
+						return Flux.deferContextual((ctx) -> {
+							ToolExecutionResult toolExecutionResult;
+							try {
+								ToolCallReactiveContextHolder.setContext(ctx);
+								toolExecutionResult = this.toolCallingManager.executeToolCalls(prompt, response);
+							} finally {
+								ToolCallReactiveContextHolder.clearContext();
+							}
 							if (toolExecutionResult.returnDirect()) {
 								// Return tool execution result directly to the client.
 								return Flux.just(ChatResponse.builder().from(response)
diff --git a/models/spring-ai-zhipuai/src/main/java/org/springframework/ai/zhipuai/ZhiPuAiChatModel.java b/models/spring-ai-zhipuai/src/main/java/org/springframework/ai/zhipuai/ZhiPuAiChatModel.java
@@ -56,6 +56,7 @@
 import org.springframework.ai.model.tool.ToolCallingManager;
 import org.springframework.ai.model.tool.ToolExecutionEligibilityPredicate;
 import org.springframework.ai.model.tool.ToolExecutionResult;
+import org.springframework.ai.model.tool.internal.ToolCallReactiveContextHolder;
 import org.springframework.ai.retry.RetryUtils;
 import org.springframework.ai.tool.definition.ToolDefinition;
 import org.springframework.ai.zhipuai.api.ZhiPuAiApi;
@@ -357,10 +358,16 @@ public Flux<ChatResponse> stream(Prompt prompt) {
 			// @formatter:off
 			Flux<ChatResponse> flux = chatResponse.flatMap(response -> {
 						if (this.toolExecutionEligibilityPredicate.isToolExecutionRequired(requestPrompt.getOptions(), response)) {
-							return Flux.defer(() -> {
-								// FIXME: bounded elastic needs to be used since tool calling
-								//  is currently only synchronous
-								var toolExecutionResult = this.toolCallingManager.executeToolCalls(requestPrompt, response);
+							// FIXME: bounded elastic needs to be used since tool calling
+							//  is currently only synchronous
+							return Flux.deferContextual((ctx) -> {
+								ToolExecutionResult toolExecutionResult;
+								try {
+									ToolCallReactiveContextHolder.setContext(ctx);
+									toolExecutionResult = this.toolCallingManager.executeToolCalls(prompt, response);
+								} finally {
+									ToolCallReactiveContextHolder.clearContext();
+								}
 								if (toolExecutionResult.returnDirect()) {
 									// Return tool execution result directly to the client.
 									return Flux.just(ChatResponse.builder().from(response)
diff --git a/spring-ai-model/src/main/java/org/springframework/ai/model/tool/internal/ToolCallReactiveContextHolder.java b/spring-ai-model/src/main/java/org/springframework/ai/model/tool/internal/ToolCallReactiveContextHolder.java
@@ -0,0 +1,30 @@
+package org.springframework.ai.model.tool.internal;
+
+import reactor.util.context.Context;
+import reactor.util.context.ContextView;
+
+/**
+ * This class bridges blocking Tools call and the reactive context. When calling tools, it
+ * captures the context in a thread local, making it available to re-inject in a nested
+ * reactive call.
+ *
+ * @author Daniel Garnier-Moiroux
+ * @since 1.1.0
+ */
+public class ToolCallReactiveContextHolder {
+
+	private static final ThreadLocal<ContextView> context = ThreadLocal.withInitial(Context::empty);
+
+	public static void setContext(ContextView contextView) {
+		context.set(contextView);
+	}
+
+	public static ContextView getContext() {
+		return context.get();
+	}
+
+	public static void clearContext() {
+		context.remove();
+	}
+
+}

Original file line number	Diff line number	Diff line change
`@@ -23,6 +23,7 @@`
`23`	`23`
`24`	`24`	`import org.springframework.ai.chat.model.ToolContext;`
`25`	`25`	`import org.springframework.ai.model.ModelOptionsUtils;`
	`26`	`+import org.springframework.ai.model.tool.internal.ToolCallReactiveContextHolder;`
`26`	`27`	`import org.springframework.ai.tool.ToolCallback;`
`27`	`28`	`import org.springframework.ai.tool.definition.DefaultToolDefinition;`
`28`	`29`	`import org.springframework.ai.tool.definition.ToolDefinition;`
`@@ -120,7 +121,7 @@ public String call(String functionInput) {`
`120`	`121`	`new IllegalStateException("Error calling tool: " + response.content()));`
`121`	`122`	`}`
`122`	`123`	`return ModelOptionsUtils.toJsonString(response.content());`
`123`		`- }).block();`
	`124`	`+ }).contextWrite(ctx -> ctx.putAll(ToolCallReactiveContextHolder.getContext())).block();`
`124`	`125`	`}`
`125`	`126`
`126`	`127`	`@Override`
Original file line number	Diff line number	Diff line change
`@@ -16,4 +16,4 @@`
`16`	`16`
`17`	`17`	`logging.level.org.springframework.ai.chat.client.advisor=DEBUG`
`18`	`18`
`19`		`-logging.level.org.springframework.ai.anthropic.api.AnthropicApi=DEBUG`
	`19`	`+logging.level.org.springframework.ai.anthropic.api.AnthropicApi=INFO`