Skip to content

[Feat] Perf fix - ensure deepgram provider uses async httpx calls #11641

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Jun 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion litellm/llms/custom_httpx/http_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ async def post(
stream: bool = False,
logging_obj: Optional[LiteLLMLoggingObject] = None,
files: Optional[RequestFiles] = None,
content: Any = None,
):
start_time = time.time()
try:
Expand All @@ -227,6 +228,7 @@ async def post(
headers=headers,
timeout=timeout,
files=files,
content=content,
)
response = await self.client.send(req, stream=stream)
response.raise_for_status()
Expand Down Expand Up @@ -452,14 +454,15 @@ async def single_connection_post_request(
params: Optional[dict] = None,
headers: Optional[dict] = None,
stream: bool = False,
content: Any = None,
):
"""
Making POST request for a single connection client.
Used for retrying connection client errors.
"""
req = client.build_request(
"POST", url, data=data, json=json, params=params, headers=headers # type: ignore
"POST", url, data=data, json=json, params=params, headers=headers, content=content # type: ignore
)
response = await client.send(req, stream=stream)
response.raise_for_status()
Expand Down
191 changes: 165 additions & 26 deletions litellm/llms/custom_httpx/llm_http_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -982,28 +982,22 @@ async def arerank(
request_data=request_data,
)

def audio_transcriptions(
def _prepare_audio_transcription_request(
self,
model: str,
audio_file: FileTypes,
optional_params: dict,
litellm_params: dict,
model_response: TranscriptionResponse,
timeout: float,
max_retries: int,
logging_obj: LiteLLMLoggingObj,
api_key: Optional[str],
api_base: Optional[str],
custom_llm_provider: str,
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
atranscription: bool = False,
headers: Optional[Dict[str, Any]] = None,
provider_config: Optional[BaseAudioTranscriptionConfig] = None,
) -> TranscriptionResponse:
if provider_config is None:
raise ValueError(
f"No provider config found for model: {model} and provider: {custom_llm_provider}"
)
headers: Optional[Dict[str, Any]],
provider_config: BaseAudioTranscriptionConfig,
) -> Tuple[dict, str, Optional[bytes], Optional[dict]]:
"""
Shared logic for preparing audio transcription requests.
Returns: (headers, complete_url, binary_data, json_data)
"""
headers = provider_config.validate_environment(
api_key=api_key,
headers=headers or {},
Expand All @@ -1013,9 +1007,6 @@ def audio_transcriptions(
litellm_params=litellm_params,
)

if client is None or not isinstance(client, HTTPHandler):
client = _get_httpx_client()

complete_url = provider_config.get_complete_url(
api_base=api_base,
api_key=api_key,
Expand Down Expand Up @@ -1049,6 +1040,91 @@ def audio_transcriptions(
},
)

return headers, complete_url, binary_data, json_data

def _transform_audio_transcription_response(
self,
provider_config: BaseAudioTranscriptionConfig,
model: str,
response: httpx.Response,
model_response: TranscriptionResponse,
logging_obj: LiteLLMLoggingObj,
optional_params: dict,
api_key: Optional[str],
) -> TranscriptionResponse:
"""Shared logic for transforming audio transcription responses."""
if isinstance(provider_config, litellm.DeepgramAudioTranscriptionConfig):
return provider_config.transform_audio_transcription_response(
model=model,
raw_response=response,
model_response=model_response,
logging_obj=logging_obj,
request_data={},
optional_params=optional_params,
litellm_params={},
api_key=api_key,
)
return model_response

def audio_transcriptions(
self,
model: str,
audio_file: FileTypes,
optional_params: dict,
litellm_params: dict,
model_response: TranscriptionResponse,
timeout: float,
max_retries: int,
logging_obj: LiteLLMLoggingObj,
api_key: Optional[str],
api_base: Optional[str],
custom_llm_provider: str,
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
atranscription: bool = False,
headers: Optional[Dict[str, Any]] = None,
provider_config: Optional[BaseAudioTranscriptionConfig] = None,
) -> Union[TranscriptionResponse, Coroutine[Any, Any, TranscriptionResponse]]:
if provider_config is None:
raise ValueError(
f"No provider config found for model: {model} and provider: {custom_llm_provider}"
)

if atranscription is True:
return self.async_audio_transcriptions( # type: ignore
model=model,
audio_file=audio_file,
optional_params=optional_params,
litellm_params=litellm_params,
model_response=model_response,
timeout=timeout,
max_retries=max_retries,
logging_obj=logging_obj,
api_key=api_key,
api_base=api_base,
custom_llm_provider=custom_llm_provider,
client=client,
headers=headers,
provider_config=provider_config,
)

# Prepare the request
headers, complete_url, binary_data, json_data = (
self._prepare_audio_transcription_request(
model=model,
audio_file=audio_file,
optional_params=optional_params,
litellm_params=litellm_params,
logging_obj=logging_obj,
api_key=api_key,
api_base=api_base,
headers=headers,
provider_config=provider_config,
)
)

if client is None or not isinstance(client, HTTPHandler):
client = _get_httpx_client()

try:
# Make the POST request
response = client.post(
Expand All @@ -1061,19 +1137,82 @@ def audio_transcriptions(
except Exception as e:
raise self._handle_error(e=e, provider_config=provider_config)

if isinstance(provider_config, litellm.DeepgramAudioTranscriptionConfig):
returned_response = provider_config.transform_audio_transcription_response(
return self._transform_audio_transcription_response(
provider_config=provider_config,
model=model,
response=response,
model_response=model_response,
logging_obj=logging_obj,
optional_params=optional_params,
api_key=api_key,
)

async def async_audio_transcriptions(
self,
model: str,
audio_file: FileTypes,
optional_params: dict,
litellm_params: dict,
model_response: TranscriptionResponse,
timeout: float,
max_retries: int,
logging_obj: LiteLLMLoggingObj,
api_key: Optional[str],
api_base: Optional[str],
custom_llm_provider: str,
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
headers: Optional[Dict[str, Any]] = None,
provider_config: Optional[BaseAudioTranscriptionConfig] = None,
) -> TranscriptionResponse:
if provider_config is None:
raise ValueError(
f"No provider config found for model: {model} and provider: {custom_llm_provider}"
)

# Prepare the request
headers, complete_url, binary_data, json_data = (
self._prepare_audio_transcription_request(
model=model,
raw_response=response,
model_response=model_response,
logging_obj=logging_obj,
request_data={},
audio_file=audio_file,
optional_params=optional_params,
litellm_params={},
litellm_params=litellm_params,
logging_obj=logging_obj,
api_key=api_key,
api_base=api_base,
headers=headers,
provider_config=provider_config,
)
return returned_response
return model_response
)

if client is None or not isinstance(client, AsyncHTTPHandler):
async_httpx_client = get_async_httpx_client(
llm_provider=litellm.LlmProviders(custom_llm_provider),
params={"ssl_verify": litellm_params.get("ssl_verify", None)},
)
else:
async_httpx_client = client

try:
# Make the async POST request
response = await async_httpx_client.post(
url=complete_url,
headers=headers,
content=binary_data,
json=json_data,
timeout=timeout,
)
except Exception as e:
raise self._handle_error(e=e, provider_config=provider_config)

return self._transform_audio_transcription_response(
provider_config=provider_config,
model=model,
response=response,
model_response=model_response,
logging_obj=logging_obj,
optional_params=optional_params,
api_key=api_key,
)

async def async_anthropic_messages_handler(
self,
Expand Down
16 changes: 16 additions & 0 deletions tests/llm_translation/base_audio_transcription_unit_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,22 @@ def test_audio_transcription(self):

assert transcript.text is not None

@pytest.mark.asyncio
async def test_audio_transcription_async(self):
"""
Test that the audio transcription is translated correctly.
"""

litellm.set_verbose = True
litellm._turn_on_debug()
AUDIO_FILE = open(file_path, "rb")
transcription_call_args = self.get_base_audio_transcription_call_args()
transcript = await litellm.atranscription(**transcription_call_args, file=AUDIO_FILE)
print(f"transcript: {transcript.model_dump()}")
print(f"transcript hidden params: {transcript._hidden_params}")

assert transcript.text is not None

def test_audio_transcription_optional_params(self):
"""
Test that the audio transcription is translated correctly.
Expand Down
Loading