-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Description
Operating System
Windows
Version Information
There are many logs reporting 500s coming from the 2 following URLs:
https://meta-llama-3-1-405b-instruct-czz.eastus2.models.ai.azure.com/chat/completions
https://cohere-command-r-plus-uiawv.eastus2.models.ai.azure.com/chat/completions
Code snippet:
from langchain.chains import LLMChain
from langchain_core.output_parsers import StrOutputParser
from langchain.memory import ConversationBufferMemory
from langchain.prompts import (
ChatPromptTemplate,
HumanMessagePromptTemplate,
MessagesPlaceholder,
)
from langchain.schema import SystemMessage
from langchain_community.chat_models.azureml_endpoint import (
AzureMLChatOnlineEndpoint,
AzureMLEndpointApiType,
CustomOpenAIChatContentFormatter, # Updated formatter
)
token=get_token()
#"https://apimdevcloudeng.azure-api.net/mlstudio/chat/completions"
chat_model = AzureMLChatOnlineEndpoint(
#endpoint_url="https://Cohere-command-r-plus-uiawv.eastus2.models.ai.azure.com/chat/completions",
endpoint_url="https://apimdevcloudeng.azure-api.net/v1/chat/completions",
endpoint_api_type=AzureMLEndpointApiType.serverless,
endpoint_api_key=token,
content_formatter=CustomOpenAIChatContentFormatter(),
model_kwargs={"model":"mist"}
#params={"model":"mist"}
# Updated formatter
)
params={"model":"mist"}
prompt = ChatPromptTemplate.from_messages([
("system", "You are a helpful assistant"),
("user", "Question: {question}")
])
chat_llm_chain = LLMChain(
llm=chat_model,
prompt=prompt,
verbose=True,
)
output_parser = StrOutputParser()
chain = prompt | chat_model | output_parser
question = "What are the differences between Azure Machine Learning and Azure AI services?"
response = chain.invoke({"question": question})
print(response)
Github repo link:
How to consolidate 3 models in one serverless endpoint and facility calls with 3 models?
Steps to reproduce
Code snippet:
from langchain.chains import LLMChain
from langchain_core.output_parsers import StrOutputParser
from langchain.memory import ConversationBufferMemory
from langchain.prompts import (
ChatPromptTemplate,
HumanMessagePromptTemplate,
MessagesPlaceholder,
)
from langchain.schema import SystemMessage
from langchain_community.chat_models.azureml_endpoint import (
AzureMLChatOnlineEndpoint,
AzureMLEndpointApiType,
CustomOpenAIChatContentFormatter, # Updated formatter
)
token=get_token()
#"https://apimdevcloudeng.azure-api.net/mlstudio/chat/completions"
chat_model = AzureMLChatOnlineEndpoint(
#endpoint_url="https://Cohere-command-r-plus-uiawv.eastus2.models.ai.azure.com/chat/completions",
endpoint_url="https://apimdevcloudeng.azure-api.net/v1/chat/completions",
endpoint_api_type=AzureMLEndpointApiType.serverless,
endpoint_api_key=token,
content_formatter=CustomOpenAIChatContentFormatter(),
model_kwargs={"model":"mist"}
#params={"model":"mist"}
# Updated formatter
)
params={"model":"mist"}
prompt = ChatPromptTemplate.from_messages([
("system", "You are a helpful assistant"),
("user", "Question: {question}")
])
chat_llm_chain = LLMChain(
llm=chat_model,
prompt=prompt,
verbose=True,
)
output_parser = StrOutputParser()
chain = prompt | chat_model | output_parser
question = "What are the differences between Azure Machine Learning and Azure AI services?"
response = chain.invoke({"question": question})
print(response)
Github repo link:
Expected behavior
returen completion results
Actual behavior
500 errors
Addition information
No response