ASGI callable returned without completing response. #25615

levalencia · 2024-08-21T12:18:23Z

levalencia
Aug 21, 2024

Checked other resources

I added a very descriptive title to this question.
I searched the LangChain documentation with the integrated search.
I used the GitHub search to find a similar question and didn't find it.

Commit to Help

I commit to help with one of those options 👆

Example Code

@app.route('/policy', methods=['POST'])
async def policy():
    
    """
    Endpoint to generate tokens based on a prompt for HR Documents

    Returns:
        Response: Response object containing generated tokens.

    """
    
    data = await request.get_json()
    prompt = data.get("prompt", "What are the company values?")
    chat_history = data.get("chat_history", [])
    username = data.get("username", "")
    inputs = [item.get('input') for item in chat_history][-5:]
    outputs = [item.get('output') for item in chat_history][-5:] 
    access_token_keyvault = request.headers.get('Authorization-KeyVault', '').split(' ')[1]
    access_token_openai = request.headers.get('Authorization-OpenAI', '').split(' ')[1]
    access_token_search = request.headers.get('Authorization-Search', '').split(' ')[1]
    instrumentation_key = config.SHD_INSTRUMENTATION_KEY(access_token_keyvault)
    async def generate():
        start_time = time.time()
        try:
            async for chunk in policy_gpt_chain_lcel(prompt, username, inputs, outputs, access_token_keyvault, access_token_openai, access_token_search):
                yield chunk
        except Exception as e:
            # If an error occurs, log it and yield an error message to the client
            logging.error(f"An error occurred on app.py: {e} {traceback.format_exc()}")
            log_exception(instrumentation_key, e)            
            yield f"An error occurred: {e}".encode()
        finally:
            end_time = time.time()
            execution_time = (end_time - start_time) * 1000  # Convert to milliseconds
            log_custom_metric(instrumentation_key, 'policy_lcel_quart_execution_time', execution_time)
            logging.info("Generator completed")
    return Response(generate(), content_type='text/event-stream')

async def policy_gpt_chain_lcel(prompt, username, inputs, outputs,access_token_keyvault, access_token_openai, access_token_search):
    async def llm_thread_gpt_lcel(prompt, username, inputs, outputs):
       
        try:
            SHD_AZURE_OPENAI_ENDPOINT = config.SHD_AZURE_OPENAI_ENDPOINT(access_token_keyvault)
            SHD_OPENAI_DEPLOYMENT_NAME = config.SHD_OPENAI_DEPLOYMENT_NAME(access_token_keyvault)
            SHD_OPENAI_GPT_MODEL_NAME = config.SHD_OPENAI_GPT_MODEL_NAME(access_token_keyvault)
   
            llm = AzureChatOpenAI(
                azure_endpoint=SHD_AZURE_OPENAI_ENDPOINT,
                openai_api_version="2023-03-15-preview",
                deployment_name=SHD_OPENAI_DEPLOYMENT_NAME,
                azure_ad_token = access_token_openai,
                openai_api_type="Azure",
                model_name=SHD_OPENAI_GPT_MODEL_NAME,
                streaming=True,  # Set ChainStreamHandler as callback
                temperature=0,
                timeout=None,
                max_retries=3)     

            embeddings = setup_embeddings(access_token_keyvault, access_token_openai)
            vector_store = setup_vector_store(embeddings, access_token_keyvault, access_token_search)
            retriever = ArgenxUserRetrieverPolicyLCEL(vectorstore=vector_store, username=username)
            
            demo_ephemeral_chat_history = ChatMessageHistory()
            for i in range(len(inputs)):
                demo_ephemeral_chat_history.add_user_message(inputs[i])
                demo_ephemeral_chat_history.add_ai_message(outputs[i])
                
            contextualize_q_system_prompt = (
                "Given a chat history and the latest user question "
                "which might reference context in the chat history, "
                "formulate a standalone question which can be understood "
                "without the chat history. Do NOT answer the question, "
                "just reformulate it if needed and otherwise return it as is."
            )
            contextualize_q_prompt = ChatPromptTemplate.from_messages(
                [
                    ("system", contextualize_q_system_prompt),
                    MessagesPlaceholder("chat_history"),
                    ("human", "{input} Please answer in HTML format"),
                ]
            )
            history_aware_retriever = create_history_aware_retriever(llm, retriever, contextualize_q_prompt) 
            qa_prompt = ChatPromptTemplate.from_messages(
                [
                    ("system", ANSWER_PROMPT_POLICY),
                    MessagesPlaceholder("chat_history"),
                    ("human", "{input} Please answer in HTML format"),
                ]
            )

            # Define your legacy DOCUMENT_PROMPT
            document_prompt = PromptTemplate(
                input_variables=["page_content", "source"],
                template="{page_content} (Source: {source})"
            )
            question_answer_chain = create_stuff_documents_chain(llm, qa_prompt, document_prompt=document_prompt, document_variable_name="context")  | StrOutputParser()
                
            rag_chain_from_docs = create_retrieval_chain(history_aware_retriever, question_answer_chain) 
            conversational_rag_chain = RunnableWithMessageHistory(
                rag_chain_from_docs,
                lambda session_id: demo_ephemeral_chat_history, 
                input_messages_key="input",
                history_messages_key="chat_history",
                output_messages_key="answer",
            )

            async for chunk in accumulate_and_yield(conversational_rag_chain.astream({"input": prompt}, config={"configurable": {"session_id": "unbound"}})):

                yield chunk

        except Exception as e:
            logging.error(f"An error occurred in llm_thread_gpt_lcel: {e}")
            yield f"An error occurred: {e}".encode()
            
    try:
        async for chunk in llm_thread_gpt_lcel(prompt, username, inputs, outputs):
            yield chunk
    except Exception as e:
        logging.error(f"An error occurred in policy_gpt_chain_lcel: {e}")
        yield f"An error occurred: {e}".encode()



def convert_to_json(context_str):
    """
    Converts a string representation of documents into a JSON format.

    This function takes a string where each document is represented in a custom format, processes
    it to extract each document as a valid JSON object, and returns a JSON string with all
    documents included under the "context" key.

    The expected input string format is:
    - A string containing `Document(metadata=..., page_content=...)` for each document.
    - The metadata and page content should be formatted as dictionaries with keys such as
      'file_id', 'source', 'page_number', etc.

    Args:
        context_str (str): A string representing multiple documents in a custom format.

    Returns:
        str: A JSON string with a single top-level key "context" containing a list of document objects.

    Example:
        context_str = {'context': [
            Document(metadata={'file_id': 'file1', 'source': 'source1', 'page_number': '1', 'document_title': 'Title1', 'document_owning_department': 'Dept1', 'manual': True}, page_content="Content1"),
            Document(metadata={'file_id': 'file2', 'source': 'source2', 'page_number': '2', 'document_title': 'Title2', 'document_owning_department': 'Dept2', 'manual': False}, page_content="Content2")
        ]}
        
        result = convert_to_json(context_str)
        print(result)  # Output will be a JSON formatted string with the documents
    """
    
    # Adjust the string to make it valid JSON
    context_str = context_str.replace("Document(", "").replace("),", "},").replace("metadata=", "\"metadata\":").replace("page_content=", "\"page_content\":")
    
    # Correctly format the string to be JSON-compatible
    context_str = context_str.replace("{'context': [", "").replace("]}", "")
    
    # Use regular expression to find all JSON-like objects
    pattern = r'({.*?})'
    context_list = re.findall(pattern, context_str)
    
    # Initialize an empty list to store the documents
    documents = []
    
    # Iterate over each item in the context list
    for s in context_list:
        try:
            # Ensure the string is properly formatted for JSON
            s = s.replace("'", "\"").replace("True", "true").replace("False", "false")
            
            # Fix the inner double-quoted lists to be valid JSON arrays
            s = re.sub(r'"\[([^\[\]]*)\]"', r'[\1]', s)
            
            document = json.loads(s)
            documents.append(document)
        except json.JSONDecodeError as e:
            logging.error(f"Error decoding JSON for string: {s}")
            logging.error(f"Error message: {e}")
    
    # Return the JSON string with the documents encapsulated in the "context" key
    return json.dumps({"context": documents}, indent=4)

def fix_json_string(json_str):
    # Replace single-quoted keys with double-quoted keys
    json_str = re.sub(r"({|,)\s*'(.*?)'\s*:", r'\1"\2":', json_str)

    # Replace single-quoted values with double-quoted values
    json_str = re.sub(r":\s*'(.*?)'", r': "\1"', json_str)

    return json_str

def parse_json_string(json_str):
    try:
        # Fix the JSON string format
        fixed_json_str = fix_json_string(json_str)
        
        # Parse the JSON string
        json_object = json.loads(fixed_json_str)
        return json_object
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON: {e} {fixed_json_str}")
        return None

async def accumulate_and_yield(chunks):
    try:
            accumulated_answer = ""
            accumulated_context = ""
            in_context = False
            source_detected = False
            sources_content = ""
            partial_sources = ""

            async for chunk in chunks:
                chunk_str = str(chunk)
                #logging.info(f"chunks coming in: {chunk_str}")

                # Detect context start
                if '{\'context\': [Document(metadata' in chunk_str:
                    in_context = True
                    start_index = chunk_str.index('{\'context\': [Document(metadata')
                    accumulated_context = chunk_str[start_index:]

                    # Check if the context ends in the same chunk
                    if ']}' in chunk_str[start_index:]:
                        end_index = chunk_str.index(']}', start_index) + 2
                        accumulated_context = chunk_str[start_index:end_index]
                        in_context = False

                        # Yield complete context
                        json_context = convert_to_json(accumulated_context)
                        #logging.info(f"Complete context: {json_context.encode()}")                     
                        yield json_context.encode()
                        accumulated_context = ""
                    continue

                # Accumulate context
                if in_context:
                    accumulated_context += chunk_str

                    # Detect context end
                    if ']}' in chunk_str:
                        end_index = chunk_str.index(']}') + 2
                        accumulated_context = accumulated_context[:end_index]
                        in_context = False

                        # Yield complete context
                        json_context = json.dumps(accumulated_context)
                        #logging.info(f"Complete context: {json_context.encode()}")  
                        yield json_context.encode()
                        accumulated_context = ""
                    continue

                # Accumulate answers and detect "SOURCES:"
                if "{'answer':" in chunk_str:
                    answer_part = parse_json_string(chunk_str)
                    if answer_part is None:
                        logging.error(f"Failed to parse JSON from chunk: {chunk_str}")
                        continue
                
                    if 'answer' not in answer_part:
                        logging.error(f"'answer' key not found in parsed JSON: {answer_part}")
                        continue
                    
                    answer_part = answer_part['answer']

                    # Handle potential partial detection of "SOURCES:"
                    partial_sources += answer_part
                    # logging.info(f"Partial Sources: {partial_sources}")
                    if 'SOURCES:' in partial_sources:
                        source_detected = True
                        parts = partial_sources.split('SOURCES:', 1)
                        accumulated_answer += parts[0]
                        sources_content = parts[1]
                        partial_sources = ""  # Reset partial sources tracker
                    else:
                        if source_detected:
                            sources_content += answer_part
                        else:
                            accumulated_answer += answer_part

                    # Yield accumulated answer if length >= 8 characters and no 'SOURCES:' detected yet
                    if len(accumulated_answer) >= 8 and not source_detected:
                        try:
                            json_answer = json.dumps({'answer': accumulated_answer})
                            #logging.info(f"Answer chunks: {json_answer.encode()}")
                            yield json_answer.encode()
                            accumulated_answer = ""
                        except (TypeError, ValueError, json.JSONDecodeError) as e:
                            logging.error(f"Failed to process chunk: {chunk_str}")
                            logging.exception(e)
                            # Optionally yield an error or handle differently
                            continue

            # Yield any remaining accumulated answer that didn't reach 8 characters
            if accumulated_answer and not source_detected:
                try:
                    json_answer = json.dumps({'answer': accumulated_answer})
                    #logging.info(f"Final answer chunk: {json_answer.encode()}")
                    yield json_answer.encode()
                except (TypeError, ValueError, json.JSONDecodeError) as e:
                    logging.error(f"Failed to process final accumulated answer: {accumulated_answer}")
                    logging.exception(e)

             # Yield the complete sources content if 'SOURCES:' was detected
            if source_detected:
                try:
                    json_sources = json.dumps({'SOURCES': sources_content})
                    #logging.info(f"Sources chunk: {json_sources.encode()}")
                    yield json_sources.encode()
                except (TypeError, ValueError, json.JSONDecodeError) as e:
                    logging.error(f"Failed to process sources content: {sources_content}")
                    logging.exception(e)
    except Exception as e:
        logging.error(f"An error occurred in accumulate_and_yield: {e} {traceback.format_exc()}")
        yield f"An error occurred: {e} {traceback.format_exc()} ".encode()  
    finally:
        # Final yield to signal the end of the response
        yield b""

Description

this is a QUART REST API running on Azure App Service with Langchain, and we are getting this error

2024-08-19T09:35:25.738554889Z [2024-08-19 09:35:25 +0000] [90] [ERROR] ASGI callable returned without completing response.
2024-08-19T09:35:25.745152178Z fail: Middleware[0]
2024-08-19T09:35:25.745193679Z Failed to forward request to http://169.254.129.7:8000. Encountered a System.Net.Http.HttpIOException exception after 60115.444ms with message: The response ended prematurely. (ResponseEnded). Check application logs to verify the application is properly handling HTTP traffic.
2024-08-19T09:35:25.801323034Z fail: Microsoft.AspNetCore.Server.Kestrel[13]
2024-08-19T09:35:25.801365435Z Connection id "0HN605PIS1CH1", Request id "0HN605PIS1CH1:00000013": An unhandled exception was thrown by the application.
2024-08-19T09:35:25.801373535Z System.InvalidOperationException: StatusCode cannot be set because the response has already started.
2024-08-19T09:35:25.801379235Z at Microsoft.AspNetCore.Server.Kestrel.Core.Internal.Http.HttpProtocol.ThrowResponseAlreadyStartedException(String value)
2024-08-19T09:35:25.801386335Z at Microsoft.AspNetCore.Server.Kestrel.Core.Internal.Http.HttpProtocol.Microsoft.AspNetCore.Http.Features.IHttpResponseFeature.set_StatusCode(Int32 value)
2024-08-19T09:35:25.801393535Z at Microsoft.Azure.AppService.Middleware.Forwarding.RequestForwarder.OnRequest(HttpContext context) in /__w/1/s/src/EasyAuth/Middleware.Forwarding/RequestForwarder.cs:line 90
2024-08-19T09:35:25.801399535Z at Microsoft.Azure.AppService.Middleware.NetCore.AppServiceMiddleware.InvokeAsync(HttpContext context) in /__w/1/s/src/EasyAuth/Microsoft.Azure.AppService.Middleware.NetCore/AppServiceMiddleware.cs:line 153
2024-08-19T09:35:25.801404936Z at Microsoft.Azure.AppService.MiddlewareShim.AutoHealing.AutoHealingMiddleware.Invoke(HttpContext context) in /__w/1/s/src/EasyAuth/Middleware.Host/AutoHealing/AutoHealingMiddleware.cs:line 54
2024-08-19T09:35:25.801411136Z at Microsoft.AspNetCore.Server.Kestrel.Core.Internal.Http.HttpProtocol.ProcessRequests[TContext](IHttpApplication`1 application)

We are wondering if this error comes from our AzureOpenAI connection or the chains from lanchain:
ASGI callable returned without completing response.

and if so, how we can control it or know the exact error.

System Info

langchain_core: 0.2.21
langchain: 0.2.9
langchain_community: 0.2.7
langsmith: 0.1.92
langchain_openai: 0.1.17
langchain_text_splitters: 0.2.2
langchainhub: 0.1.20

@dosu · 2024-08-21T12:22:31Z

dosubot[bot]
bot Aug 21, 2024

To resolve the "ASGI callable returned without completing response" error in your QUART REST API running on Azure App Service with LangChain, ensure that your asynchronous functions are properly awaited and that the response is fully completed before returning. This error often occurs when an asynchronous function is not awaited, causing the ASGI server to think the response is incomplete.

Here are some steps to address this issue:

Ensure Asynchronous Functions are Awaited: Make sure that all asynchronous functions, especially those interacting with AzureOpenAI, are awaited.
Check Streaming Responses: If you are using streaming responses, ensure that the streaming is properly handled and the response is fully sent before the function returns.

Here is an example of how you might structure your asynchronous function to ensure it completes properly:

from quart import Quart, request, jsonify, Response
from langchain_community.chat_models.azure_openai import AzureChatOpenAI
import logging
import traceback

app = Quart(__name__)

# Initialize your Azure OpenAI client
openai_client = AzureChatOpenAI(
    azure_deployment="your_deployment_name",
    openai_api_version="2023-12-01-preview",
    openai_api_key="your_openai_api_key",
    azure_endpoint="your_azure_endpoint"
)

@app.route('/policy', methods=['POST'])
async def policy():
    data = await request.get_json()
    prompt = data.get("prompt", "What are the company values?")
    chat_history = data.get("chat_history", [])
    username = data.get("username", "")
    inputs = [item.get('input') for item in chat_history][-5:]
    outputs = [item.get('output') for item in chat_history][-5:] 
    access_token_keyvault = request.headers.get('Authorization-KeyVault', '').split(' ')[1]
    access_token_openai = request.headers.get('Authorization-OpenAI', '').split(' ')[1]
    access_token_search = request.headers.get('Authorization-Search', '').split(' ')[1]
    instrumentation_key = config.SHD_INSTRUMENTATION_KEY(access_token_keyvault)

    async def generate():
        start_time = time.time()
        try:
            async for chunk in policy_gpt_chain_lcel(prompt, username, inputs, outputs, access_token_keyvault, access_token_openai, access_token_search):
                yield chunk
        except Exception as e:
            logging.error(f"An error occurred on app.py: {e} {traceback.format_exc()}")
            log_exception(instrumentation_key, e)            
            yield f"An error occurred: {e}".encode()
        finally:
            end_time = time.time()
            execution_time = (end_time - start_time) * 1000  # Convert to milliseconds
            log_custom_metric(instrumentation_key, 'policy_lcel_quart_execution_time', execution_time)
            logging.info("Generator completed")

    return Response(generate(), content_type='text/event-stream')

async def policy_gpt_chain_lcel(prompt, username, inputs, outputs, access_token_keyvault, access_token_openai, access_token_search):
    async def llm_thread_gpt_lcel(prompt, username, inputs, outputs):
        try:
            SHD_AZURE_OPENAI_ENDPOINT = config.SHD_AZURE_OPENAI_ENDPOINT(access_token_keyvault)
            SHD_OPENAI_DEPLOYMENT_NAME = config.SHD_OPENAI_DEPLOYMENT_NAME(access_token_keyvault)
            SHD_OPENAI_GPT_MODEL_NAME = config.SHD_OPENAI_GPT_MODEL_NAME(access_token_keyvault)
   
            llm = AzureChatOpenAI(
                azure_endpoint=SHD_AZURE_OPENAI_ENDPOINT,
                openai_api_version="2023-03-15-preview",
                deployment_name=SHD_OPENAI_DEPLOYMENT_NAME,
                azure_ad_token=access_token_openai,
                openai_api_type="Azure",
                model_name=SHD_OPENAI_GPT_MODEL_NAME,
                streaming=True,
                temperature=0,
                timeout=None,
                max_retries=3
            )     

            embeddings = setup_embeddings(access_token_keyvault, access_token_openai)
            vector_store = setup_vector_store(embeddings, access_token_keyvault, access_token_search)
            retriever = ArgenxUserRetrieverPolicyLCEL(vectorstore=vector_store, username=username)
            
            demo_ephemeral_chat_history = ChatMessageHistory()
            for i in range(len(inputs)):
                demo_ephemeral_chat_history.add_user_message(inputs[i])
                demo_ephemeral_chat_history.add_ai_message(outputs[i])
                
            contextualize_q_system_prompt = (
                "Given a chat history and the latest user question "
                "which might reference context in the chat history, "
                "formulate a standalone question which can be understood "
                "without the chat history. Do NOT answer the question, "
                "just reformulate it if needed and otherwise return it as is."
            )
            contextualize_q_prompt = ChatPromptTemplate.from_messages(
                [
                    ("system", contextualize_q_system_prompt),
                    MessagesPlaceholder("chat_history"),
                    ("human", "{input} Please answer in HTML format"),
                ]
            )
            history_aware_retriever = create_history_aware_retriever(llm, retriever, contextualize_q_prompt) 
            qa_prompt = ChatPromptTemplate.from_messages(
                [
                    ("system", ANSWER_PROMPT_POLICY),
                    MessagesPlaceholder("chat_history"),
                    ("human", "{input} Please answer in HTML format"),
                ]
            )

            document_prompt = PromptTemplate(
                input_variables=["page_content", "source"],
                template="{page_content} (Source: {source})"
            )
            question_answer_chain = create_stuff_documents_chain(llm, qa_prompt, document_prompt=document_prompt, document_variable_name="context") | StrOutputParser()
                
            rag_chain_from_docs = create_retrieval_chain(history_aware_retriever, question_answer_chain) 
            conversational_rag_chain = RunnableWithMessageHistory(
                rag_chain_from_docs,
                lambda session_id: demo_ephemeral_chat_history, 
                input_messages_key="input",
                history_messages_key="chat_history",
                output_messages_key="answer",
            )

            async for chunk in accumulate_and_yield(conversational_rag_chain.astream({"input": prompt}, config={"configurable": {"session_id": "unbound"}})):
                yield chunk

        except Exception as e:
            logging.error(f"An error occurred in llm_thread_gpt_lcel: {e}")
            yield f"An error occurred: {e}".encode()
            
    try:
        async for chunk in llm_thread_gpt_lcel(prompt, username, inputs, outputs):
            yield chunk
    except Exception as e:
        logging.error(f"An error occurred in policy_gpt_chain_lcel: {e}")
        yield f"An error occurred: {e}".encode()

async def accumulate_and_yield(chunks):
    try:
        accumulated_answer = ""
        accumulated_context = ""
        in_context = False
        source_detected = False
        sources_content = ""
        partial_sources = ""

        async for chunk in chunks:
            chunk_str = str(chunk)

            if '{\'context\': [Document(metadata' in chunk_str:
                in_context = True
                start_index = chunk_str.index('{\'context\': [Document(metadata')
                accumulated_context = chunk_str[start_index:]

                if ']}' in chunk_str[start_index:]:
                    end_index = chunk_str.index(']}', start_index) + 2
                    accumulated_context = chunk_str[start_index:end_index]
                    in_context = False

                    json_context = convert_to_json(accumulated_context)
                    yield json_context.encode()
                    accumulated_context = ""
                continue

            if in_context:
                accumulated_context += chunk_str

                if ']}' in chunk_str:
                    end_index = chunk_str.index(']}') + 2
                    accumulated_context = accumulated_context[:end_index]
                    in_context = False

                    json_context = json.dumps(accumulated_context)
                    yield json_context.encode()
                    accumulated_context = ""
                continue

            if "{'answer':" in chunk_str:
                answer_part = parse_json_string(chunk_str)
                if answer_part is None:
                    logging.error(f"Failed to parse JSON from chunk: {chunk_str}")
                    continue
                
                if 'answer' not in answer_part:
                    logging.error(f"'answer' key not found in parsed JSON: {answer_part}")
                    continue
                
                answer_part = answer_part['answer']

                partial_sources += answer_part
                if 'SOURCES:' in partial_sources:
                    source_detected = True
                    parts = partial_sources.split('SOURCES:', 1)
                    accumulated_answer += parts[0]
                    sources_content = parts[1]
                    partial_sources = ""
                else:
                    if source_detected:
                        sources_content += answer_part
                    else:
                        accumulated_answer += answer_part

                if len(accumulated_answer) >= 8 and not source_detected:
                    try:
                        json_answer = json.dumps({'answer': accumulated_answer})
                        yield json_answer.encode()
                        accumulated_answer = ""
                    except (TypeError, ValueError, json.JSONDecodeError) as e:
                        logging.error(f"Failed to process chunk: {chunk_str}")
                        logging.exception(e)
                        continue

        if accumulated_answer and not source_detected:
            try:
                json_answer = json.dumps({'answer': accumulated_answer})
                yield json_answer.encode()
            except (TypeError, ValueError, json.JSONDecodeError) as e:
                logging.error(f"Failed to process final accumulated answer: {accumulated_answer}")
                logging.exception(e)

        if source_detected:
            try:
                json_sources = json.dumps({'SOURCES': sources_content})
                yield json_sources.encode()
            except (TypeError, ValueError, json.JSONDecodeError) as e:
                logging.error(f"Failed to process sources content: {sources_content}")
                logging.exception(e)
    except Exception as e:
        logging.error(f"An error occurred in accumulate_and_yield: {e} {traceback.format_exc()}")
        yield f"An error occurred: {e} {traceback.format_exc()} ".encode()
    finally:
        yield b""

Ensure that all asynchronous calls are properly awaited and the response is fully completed before returning. This should help resolve the "ASGI callable returned without completing response" error. If the issue persists, it might be related to specific configurations or network issues with AzureOpenAI, which would require further investigation ^[1]^[2]^[3]^[4]^[5].

To continue talking to Dosu, mention @dosu.

0 replies

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

ASGI callable returned without completing response. #25615

Uh oh!

{{title}}

Uh oh!

Replies: 1 comment

Uh oh!

{{title}}

Uh oh!

Select a reply

Uh oh!

ASGI callable returned without completing response. #25615

Uh oh!

levalencia Aug 21, 2024

Checked other resources

Commit to Help

Example Code

Description

System Info

Replies: 1 comment

Uh oh!

dosubot[bot] bot Aug 21, 2024

levalencia
Aug 21, 2024

dosubot[bot]
bot Aug 21, 2024