Don't send sampling settings like temperature and top_p to OpenAI reasoning models (#1956)

DouweM · web-flow · commit 0690748cbfa3 · 2025-06-11T11:11:37.000-06:00
diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py
@@ -281,6 +281,12 @@ async def _completions_create(
 
         openai_messages = await self._map_messages(messages)
 
+        sampling_settings = (
+            model_settings
+            if OpenAIModelProfile.from_profile(self.profile).openai_supports_sampling_settings
+            else OpenAIModelSettings()
+        )
+
         try:
             extra_headers = model_settings.get('extra_headers', {})
             extra_headers.setdefault('User-Agent', get_user_agent())
@@ -294,18 +300,18 @@ async def _completions_create(
                 stream_options={'include_usage': True} if stream else NOT_GIVEN,
                 stop=model_settings.get('stop_sequences', NOT_GIVEN),
                 max_completion_tokens=model_settings.get('max_tokens', NOT_GIVEN),
-                temperature=model_settings.get('temperature', NOT_GIVEN),
-                top_p=model_settings.get('top_p', NOT_GIVEN),
                 timeout=model_settings.get('timeout', NOT_GIVEN),
                 seed=model_settings.get('seed', NOT_GIVEN),
-                presence_penalty=model_settings.get('presence_penalty', NOT_GIVEN),
-                frequency_penalty=model_settings.get('frequency_penalty', NOT_GIVEN),
-                logit_bias=model_settings.get('logit_bias', NOT_GIVEN),
                 reasoning_effort=model_settings.get('openai_reasoning_effort', NOT_GIVEN),
-                logprobs=model_settings.get('openai_logprobs', NOT_GIVEN),
-                top_logprobs=model_settings.get('openai_top_logprobs', NOT_GIVEN),
                 user=model_settings.get('openai_user', NOT_GIVEN),
                 service_tier=model_settings.get('openai_service_tier', NOT_GIVEN),
+                temperature=sampling_settings.get('temperature', NOT_GIVEN),
+                top_p=sampling_settings.get('top_p', NOT_GIVEN),
+                presence_penalty=sampling_settings.get('presence_penalty', NOT_GIVEN),
+                frequency_penalty=sampling_settings.get('frequency_penalty', NOT_GIVEN),
+                logit_bias=sampling_settings.get('logit_bias', NOT_GIVEN),
+                logprobs=sampling_settings.get('openai_logprobs', NOT_GIVEN),
+                top_logprobs=sampling_settings.get('openai_top_logprobs', NOT_GIVEN),
                 extra_headers=extra_headers,
                 extra_body=model_settings.get('extra_body'),
             )
@@ -664,6 +670,12 @@ async def _responses_create(
         instructions, openai_messages = await self._map_messages(messages)
         reasoning = self._get_reasoning(model_settings)
 
+        sampling_settings = (
+            model_settings
+            if OpenAIModelProfile.from_profile(self.profile).openai_supports_sampling_settings
+            else OpenAIResponsesModelSettings()
+        )
+
         try:
             extra_headers = model_settings.get('extra_headers', {})
             extra_headers.setdefault('User-Agent', get_user_agent())
@@ -676,8 +688,8 @@ async def _responses_create(
                 tool_choice=tool_choice or NOT_GIVEN,
                 max_output_tokens=model_settings.get('max_tokens', NOT_GIVEN),
                 stream=stream,
-                temperature=model_settings.get('temperature', NOT_GIVEN),
-                top_p=model_settings.get('top_p', NOT_GIVEN),
+                temperature=sampling_settings.get('temperature', NOT_GIVEN),
+                top_p=sampling_settings.get('top_p', NOT_GIVEN),
                 truncation=model_settings.get('openai_truncation', NOT_GIVEN),
                 timeout=model_settings.get('timeout', NOT_GIVEN),
                 reasoning=reasoning,
diff --git a/pydantic_ai_slim/pydantic_ai/profiles/openai.py b/pydantic_ai_slim/pydantic_ai/profiles/openai.py
@@ -15,13 +15,20 @@ class OpenAIModelProfile(ModelProfile):
     ALL FIELDS MUST BE `openai_` PREFIXED SO YOU CAN MERGE THEM WITH OTHER MODELS.
     """
 
-    # This can be set by a provider or user if the OpenAI-"compatible" API doesn't support strict tool definitions
     openai_supports_strict_tool_definition: bool = True
+    """This can be set by a provider or user if the OpenAI-"compatible" API doesn't support strict tool definitions."""
+
+    openai_supports_sampling_settings: bool = True
+    """Turn off to don't send sampling settings like `temperature` and `top_p` to models that don't support them, like OpenAI's o-series reasoning models."""
 
 
 def openai_model_profile(model_name: str) -> ModelProfile:
     """Get the model profile for an OpenAI model."""
-    return OpenAIModelProfile(json_schema_transformer=OpenAIJsonSchemaTransformer)
+    is_reasoning_model = model_name.startswith('o')
+    return OpenAIModelProfile(
+        json_schema_transformer=OpenAIJsonSchemaTransformer,
+        openai_supports_sampling_settings=not is_reasoning_model,
+    )
 
 
 _STRICT_INCOMPATIBLE_KEYS = [
diff --git a/tests/models/cassettes/test_openai/test_reasoning_model_with_temperature.yaml b/tests/models/cassettes/test_openai/test_reasoning_model_with_temperature.yaml
@@ -0,0 +1,77 @@
+interactions:
+- request:
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '106'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+    method: POST
+    parsed_body:
+      messages:
+      - content: What is the capital of Mexico?
+        role: user
+      model: o3-mini
+      stream: false
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    headers:
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      connection:
+      - keep-alive
+      content-length:
+      - '944'
+      content-type:
+      - application/json
+      openai-organization:
+      - pydantic-28gund
+      openai-processing-ms:
+      - '5417'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      transfer-encoding:
+      - chunked
+    parsed_body:
+      choices:
+      - finish_reason: stop
+        index: 0
+        message:
+          annotations: []
+          content: The capital of Mexico is Mexico City. It is not only the seat of the federal government but also a major
+            cultural, political, and economic center in the country.
+          refusal: null
+          role: assistant
+      created: 1749586227
+      id: chatcmpl-BgzadlOEkRTXYim8s2TzQfnSpaZ4u
+      model: o3-mini-2025-01-31
+      object: chat.completion
+      service_tier: default
+      system_fingerprint: fp_e20469f047
+      usage:
+        completion_tokens: 238
+        completion_tokens_details:
+          accepted_prediction_tokens: 0
+          audio_tokens: 0
+          reasoning_tokens: 192
+          rejected_prediction_tokens: 0
+        prompt_tokens: 13
+        prompt_tokens_details:
+          audio_tokens: 0
+          cached_tokens: 0
+        total_tokens: 251
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/models/cassettes/test_openai_responses/test_reasoning_model_with_temperature.yaml b/tests/models/cassettes/test_openai_responses/test_reasoning_model_with_temperature.yaml
@@ -0,0 +1,96 @@
+interactions:
+- request:
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '103'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+    method: POST
+    parsed_body:
+      input:
+      - content: What is the capital of Mexico?
+        role: user
+      model: o3-mini
+      stream: false
+    uri: https://api.openai.com/v1/responses
+  response:
+    headers:
+      alt-svc:
+      - h3=":443"; ma=86400
+      connection:
+      - keep-alive
+      content-length:
+      - '1502'
+      content-type:
+      - application/json
+      openai-organization:
+      - pydantic-28gund
+      openai-processing-ms:
+      - '5769'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      transfer-encoding:
+      - chunked
+    parsed_body:
+      background: false
+      created_at: 1749586308
+      error: null
+      id: resp_684891844d0481a28f2d4ed6ea21aa8a0a1b41b457712f86
+      incomplete_details: null
+      instructions: null
+      max_output_tokens: null
+      metadata: {}
+      model: o3-mini-2025-01-31
+      object: response
+      output:
+      - id: rs_68489188081c81a29dea5b85122ad5ed0a1b41b457712f86
+        summary: []
+        type: reasoning
+      - content:
+        - annotations: []
+          text: The capital of Mexico is Mexico City. It serves as the political, cultural, and economic heart of the country
+            and is one of the largest metropolitan areas in the world.
+          type: output_text
+        id: msg_684891892e2481a2a1a8ae81c0541d800a1b41b457712f86
+        role: assistant
+        status: completed
+        type: message
+      parallel_tool_calls: true
+      previous_response_id: null
+      reasoning:
+        effort: medium
+        summary: null
+      service_tier: default
+      status: completed
+      store: true
+      temperature: 1.0
+      text:
+        format:
+          type: text
+      tool_choice: auto
+      tools: []
+      top_p: 1.0
+      truncation: disabled
+      usage:
+        input_tokens: 13
+        input_tokens_details:
+          cached_tokens: 0
+        output_tokens: 227
+        output_tokens_details:
+          reasoning_tokens: 192
+        total_tokens: 240
+      user: null
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/models/test_openai.py b/tests/models/test_openai.py
@@ -1617,6 +1617,16 @@ async def test_openai_instructions_with_logprobs(allow_model_requests: None):
     ]
 
 
+@pytest.mark.vcr()
+async def test_reasoning_model_with_temperature(allow_model_requests: None, openai_api_key: str):
+    m = OpenAIModel('o3-mini', provider=OpenAIProvider(api_key=openai_api_key))
+    agent = Agent(m, model_settings=OpenAIModelSettings(temperature=0.5))
+    result = await agent.run('What is the capital of Mexico?')
+    assert result.output == snapshot(
+        'The capital of Mexico is Mexico City. It is not only the seat of the federal government but also a major cultural, political, and economic center in the country.'
+    )
+
+
 def test_openai_model_profile():
     m = OpenAIModel('gpt-4o', provider=OpenAIProvider(api_key='foobar'))
     assert isinstance(m.profile, OpenAIModelProfile)
diff --git a/tests/models/test_openai_responses.py b/tests/models/test_openai_responses.py
@@ -505,3 +505,13 @@ def test_model_profile_strict_not_supported():
             'strict': False,
         }
     )
+
+
+@pytest.mark.vcr()
+async def test_reasoning_model_with_temperature(allow_model_requests: None, openai_api_key: str):
+    m = OpenAIResponsesModel('o3-mini', provider=OpenAIProvider(api_key=openai_api_key))
+    agent = Agent(m, model_settings=OpenAIResponsesModelSettings(temperature=0.5))
+    result = await agent.run('What is the capital of Mexico?')
+    assert result.output == snapshot(
+        'The capital of Mexico is Mexico City. It serves as the political, cultural, and economic heart of the country and is one of the largest metropolitan areas in the world.'
+    )

Original file line number	Diff line number	Diff line change
`@@ -505,3 +505,13 @@ def test_model_profile_strict_not_supported():`
`505`	`505`	`'strict': False,`
`506`	`506`	`}`
`507`	`507`	`)`
	`508`	`+`
	`509`	`+`
	`510`	`+@pytest.mark.vcr()`
	`511`	`+async def test_reasoning_model_with_temperature(allow_model_requests: None, openai_api_key: str):`
	`512`	`+ m = OpenAIResponsesModel('o3-mini', provider=OpenAIProvider(api_key=openai_api_key))`
	`513`	`+ agent = Agent(m, model_settings=OpenAIResponsesModelSettings(temperature=0.5))`
	`514`	`+ result = await agent.run('What is the capital of Mexico?')`
	`515`	`+ assert result.output == snapshot(`
	`516`	`+ 'The capital of Mexico is Mexico City. It serves as the political, cultural, and economic heart of the country and is one of the largest metropolitan areas in the world.'`
	`517`	`+ )`