[Feat] Perf fix - ensure deepgram provider uses async httpx calls (#11641)

ishaan-jaff · cursoragent · web-flow · commit 688e65b2b266 · 2025-06-11T18:32:01.000-07:00
* Checkpoint before follow-up message

* Add comprehensive tests for Deepgram transcription functionality

* clean up transform

* just use 1 test

* test cleanup

* test fix get_complete_url

* test rename file

* refactor deepgram URL construction

* add logging_obj.pre_call

* fix unused imports

* feat - add async deepgram support

* test_audio_transcription_async

* fix python 3.8 test

---------

Co-authored-by: Cursor Agent &lt;cursoragent@cursor.com&gt;
diff --git a/litellm/llms/custom_httpx/http_handler.py b/litellm/llms/custom_httpx/http_handler.py
@@ -212,6 +212,7 @@ async def post(
         stream: bool = False,
         logging_obj: Optional[LiteLLMLoggingObject] = None,
         files: Optional[RequestFiles] = None,
+        content: Any = None,
     ):
         start_time = time.time()
         try:
@@ -227,6 +228,7 @@ async def post(
                 headers=headers,
                 timeout=timeout,
                 files=files,
+                content=content,
             )
             response = await self.client.send(req, stream=stream)
             response.raise_for_status()
@@ -452,14 +454,15 @@ async def single_connection_post_request(
         params: Optional[dict] = None,
         headers: Optional[dict] = None,
         stream: bool = False,
+        content: Any = None,
     ):
         """
         Making POST request for a single connection client.
 
         Used for retrying connection client errors.
         """
         req = client.build_request(
-            "POST", url, data=data, json=json, params=params, headers=headers  # type: ignore
+            "POST", url, data=data, json=json, params=params, headers=headers, content=content  # type: ignore
         )
         response = await client.send(req, stream=stream)
         response.raise_for_status()
diff --git a/litellm/llms/custom_httpx/llm_http_handler.py b/litellm/llms/custom_httpx/llm_http_handler.py
@@ -982,28 +982,22 @@ async def arerank(
             request_data=request_data,
         )
 
-    def audio_transcriptions(
+    def _prepare_audio_transcription_request(
         self,
         model: str,
         audio_file: FileTypes,
         optional_params: dict,
         litellm_params: dict,
-        model_response: TranscriptionResponse,
-        timeout: float,
-        max_retries: int,
         logging_obj: LiteLLMLoggingObj,
         api_key: Optional[str],
         api_base: Optional[str],
-        custom_llm_provider: str,
-        client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
-        atranscription: bool = False,
-        headers: Optional[Dict[str, Any]] = None,
-        provider_config: Optional[BaseAudioTranscriptionConfig] = None,
-    ) -> TranscriptionResponse:
-        if provider_config is None:
-            raise ValueError(
-                f"No provider config found for model: {model} and provider: {custom_llm_provider}"
-            )
+        headers: Optional[Dict[str, Any]],
+        provider_config: BaseAudioTranscriptionConfig,
+    ) -> Tuple[dict, str, Optional[bytes], Optional[dict]]:
+        """
+        Shared logic for preparing audio transcription requests.
+        Returns: (headers, complete_url, binary_data, json_data)
+        """
         headers = provider_config.validate_environment(
             api_key=api_key,
             headers=headers or {},
@@ -1013,9 +1007,6 @@ def audio_transcriptions(
             litellm_params=litellm_params,
         )
 
-        if client is None or not isinstance(client, HTTPHandler):
-            client = _get_httpx_client()
-
         complete_url = provider_config.get_complete_url(
             api_base=api_base,
             api_key=api_key,
@@ -1049,6 +1040,91 @@ def audio_transcriptions(
             },
         )
 
+        return headers, complete_url, binary_data, json_data
+
+    def _transform_audio_transcription_response(
+        self,
+        provider_config: BaseAudioTranscriptionConfig,
+        model: str,
+        response: httpx.Response,
+        model_response: TranscriptionResponse,
+        logging_obj: LiteLLMLoggingObj,
+        optional_params: dict,
+        api_key: Optional[str],
+    ) -> TranscriptionResponse:
+        """Shared logic for transforming audio transcription responses."""
+        if isinstance(provider_config, litellm.DeepgramAudioTranscriptionConfig):
+            return provider_config.transform_audio_transcription_response(
+                model=model,
+                raw_response=response,
+                model_response=model_response,
+                logging_obj=logging_obj,
+                request_data={},
+                optional_params=optional_params,
+                litellm_params={},
+                api_key=api_key,
+            )
+        return model_response
+
+    def audio_transcriptions(
+        self,
+        model: str,
+        audio_file: FileTypes,
+        optional_params: dict,
+        litellm_params: dict,
+        model_response: TranscriptionResponse,
+        timeout: float,
+        max_retries: int,
+        logging_obj: LiteLLMLoggingObj,
+        api_key: Optional[str],
+        api_base: Optional[str],
+        custom_llm_provider: str,
+        client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
+        atranscription: bool = False,
+        headers: Optional[Dict[str, Any]] = None,
+        provider_config: Optional[BaseAudioTranscriptionConfig] = None,
+    ) -> Union[TranscriptionResponse, Coroutine[Any, Any, TranscriptionResponse]]:
+        if provider_config is None:
+            raise ValueError(
+                f"No provider config found for model: {model} and provider: {custom_llm_provider}"
+            )
+
+        if atranscription is True:
+            return self.async_audio_transcriptions(  # type: ignore
+                model=model,
+                audio_file=audio_file,
+                optional_params=optional_params,
+                litellm_params=litellm_params,
+                model_response=model_response,
+                timeout=timeout,
+                max_retries=max_retries,
+                logging_obj=logging_obj,
+                api_key=api_key,
+                api_base=api_base,
+                custom_llm_provider=custom_llm_provider,
+                client=client,
+                headers=headers,
+                provider_config=provider_config,
+            )
+
+        # Prepare the request
+        headers, complete_url, binary_data, json_data = (
+            self._prepare_audio_transcription_request(
+                model=model,
+                audio_file=audio_file,
+                optional_params=optional_params,
+                litellm_params=litellm_params,
+                logging_obj=logging_obj,
+                api_key=api_key,
+                api_base=api_base,
+                headers=headers,
+                provider_config=provider_config,
+            )
+        )
+
+        if client is None or not isinstance(client, HTTPHandler):
+            client = _get_httpx_client()
+
         try:
             # Make the POST request
             response = client.post(
@@ -1061,19 +1137,82 @@ def audio_transcriptions(
         except Exception as e:
             raise self._handle_error(e=e, provider_config=provider_config)
 
-        if isinstance(provider_config, litellm.DeepgramAudioTranscriptionConfig):
-            returned_response = provider_config.transform_audio_transcription_response(
+        return self._transform_audio_transcription_response(
+            provider_config=provider_config,
+            model=model,
+            response=response,
+            model_response=model_response,
+            logging_obj=logging_obj,
+            optional_params=optional_params,
+            api_key=api_key,
+        )
+
+    async def async_audio_transcriptions(
+        self,
+        model: str,
+        audio_file: FileTypes,
+        optional_params: dict,
+        litellm_params: dict,
+        model_response: TranscriptionResponse,
+        timeout: float,
+        max_retries: int,
+        logging_obj: LiteLLMLoggingObj,
+        api_key: Optional[str],
+        api_base: Optional[str],
+        custom_llm_provider: str,
+        client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
+        headers: Optional[Dict[str, Any]] = None,
+        provider_config: Optional[BaseAudioTranscriptionConfig] = None,
+    ) -> TranscriptionResponse:
+        if provider_config is None:
+            raise ValueError(
+                f"No provider config found for model: {model} and provider: {custom_llm_provider}"
+            )
+
+        # Prepare the request
+        headers, complete_url, binary_data, json_data = (
+            self._prepare_audio_transcription_request(
                 model=model,
-                raw_response=response,
-                model_response=model_response,
-                logging_obj=logging_obj,
-                request_data={},
+                audio_file=audio_file,
                 optional_params=optional_params,
-                litellm_params={},
+                litellm_params=litellm_params,
+                logging_obj=logging_obj,
                 api_key=api_key,
+                api_base=api_base,
+                headers=headers,
+                provider_config=provider_config,
             )
-            return returned_response
-        return model_response
+        )
+
+        if client is None or not isinstance(client, AsyncHTTPHandler):
+            async_httpx_client = get_async_httpx_client(
+                llm_provider=litellm.LlmProviders(custom_llm_provider),
+                params={"ssl_verify": litellm_params.get("ssl_verify", None)},
+            )
+        else:
+            async_httpx_client = client
+
+        try:
+            # Make the async POST request
+            response = await async_httpx_client.post(
+                url=complete_url,
+                headers=headers,
+                content=binary_data,
+                json=json_data,
+                timeout=timeout,
+            )
+        except Exception as e:
+            raise self._handle_error(e=e, provider_config=provider_config)
+
+        return self._transform_audio_transcription_response(
+            provider_config=provider_config,
+            model=model,
+            response=response,
+            model_response=model_response,
+            logging_obj=logging_obj,
+            optional_params=optional_params,
+            api_key=api_key,
+        )
 
     async def async_anthropic_messages_handler(
         self,
diff --git a/tests/llm_translation/base_audio_transcription_unit_tests.py b/tests/llm_translation/base_audio_transcription_unit_tests.py
@@ -52,6 +52,22 @@ def test_audio_transcription(self):
 
         assert transcript.text is not None
 
+    @pytest.mark.asyncio
+    async def test_audio_transcription_async(self):
+        """
+        Test that the audio transcription is translated correctly.
+        """
+
+        litellm.set_verbose = True
+        litellm._turn_on_debug()
+        AUDIO_FILE = open(file_path, "rb")
+        transcription_call_args = self.get_base_audio_transcription_call_args()
+        transcript = await litellm.atranscription(**transcription_call_args, file=AUDIO_FILE)
+        print(f"transcript: {transcript.model_dump()}")
+        print(f"transcript hidden params: {transcript._hidden_params}")
+
+        assert transcript.text is not None
+
     def test_audio_transcription_optional_params(self):
         """
         Test that the audio transcription is translated correctly.