Skip to content

Users/singankit/tool call accuracy improvements #41897

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,22 @@
as found under the "Name" column in the "Connected Resources" tab in your Azure AI Foundry project.
"""

import json
import os
from azure.ai.projects import AIProjectClient
from azure.identity import DefaultAzureCredential
from azure.ai.agents.models import AzureAISearchQueryType, AzureAISearchTool, ListSortOrder, MessageRole
from dotenv import load_dotenv
from azure.ai.evaluation import AIAgentConverter, ToolCallAccuracyEvaluator, AzureOpenAIModelConfiguration

load_dotenv()

model_config = AzureOpenAIModelConfiguration(
azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
api_key=os.environ["AZURE_OPENAI_API_KEY"],
api_version=os.environ["AZURE_OPENAI_API_VERSION"],
azure_deployment=os.environ["MODEL_DEPLOYMENT_NAME"],
)

project_client = AIProjectClient(
endpoint=os.environ["PROJECT_ENDPOINT"],
Expand All @@ -51,7 +63,7 @@

# Initialize agent AI search tool and add the search index connection id
ai_search = AzureAISearchTool(
index_connection_id=conn_id, index_name="sample_index", query_type=AzureAISearchQueryType.SIMPLE, top_k=3, filter=""
index_connection_id=conn_id, index_name="contoso-manuals-index", query_type=AzureAISearchQueryType.SIMPLE, top_k=3, filter=""
)

# Create agent with AI search tool and process agent run
Expand All @@ -66,7 +78,7 @@
agent = agents_client.create_agent(
model=os.environ["MODEL_DEPLOYMENT_NAME"],
name="my-agent",
instructions="You are a helpful agent",
instructions="Hello, you are helpful agent and can search information from search index using Azure AI Search tool provided. Use the tool to answer questions about Contoso products.",
tools=ai_search.definitions,
tool_resources=ai_search.resources,
)
Expand All @@ -81,7 +93,7 @@
message = agents_client.messages.create(
thread_id=thread.id,
role="user",
content="What is the temperature rating of the cozynights sleeping bag?",
content="What contso tent do you recommend for hiking ?",
)
print(f"Created message, ID: {message.id}")

Expand All @@ -108,7 +120,7 @@
azure_ai_search_details = call.get("azure_ai_search", {})
if azure_ai_search_details:
print(f" azure_ai_search input: {azure_ai_search_details.get('input')}")
print(f" azure_ai_search output: {azure_ai_search_details.get('output')}")
print(f" azure_ai_search output: {json.dumps(azure_ai_search_details.get('output'), indent=2)}")
print() # add an extra newline between steps

# Delete the agent when done
Expand All @@ -133,3 +145,12 @@
for message_text in message.text_messages:
print(f"{message.role}: {message_text.text.value}")
# [END populate_references_agent_with_azure_ai_search_tool]

# Evaluate the run using the converter
converter = AIAgentConverter(project_client)
converted_output = converter.convert(thread_id=thread.id, run_id=run.id)
print(converted_output)

tool_call_accuracy = ToolCallAccuracyEvaluator(model_config=model_config)
response = tool_call_accuracy(**converted_output)
print(f"Tool call accuracy: {json.dumps(response, indent=4)}")
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,21 @@
/subscriptions/{subscription-id}/resourceGroups/{resource-group-name}/providers/Microsoft.MachineLearningServices/workspaces/{workspace-name}/connections/{connection-name}
"""

import json
import os
from azure.ai.projects import AIProjectClient
from azure.ai.agents.models import MessageRole, BingGroundingTool
from azure.identity import DefaultAzureCredential

from azure.ai.evaluation import AIAgentConverter, ToolCallAccuracyEvaluator, AzureOpenAIModelConfiguration
from dotenv import load_dotenv
load_dotenv()

model_config = AzureOpenAIModelConfiguration(
azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
api_key=os.environ["AZURE_OPENAI_API_KEY"],
api_version=os.environ["AZURE_OPENAI_API_VERSION"],
azure_deployment=os.environ["MODEL_DEPLOYMENT_NAME"],
)

project_client = AIProjectClient(
endpoint=os.environ["PROJECT_ENDPOINT"],
Expand Down Expand Up @@ -90,6 +100,7 @@
bing_grounding_details = call.get("bing_grounding", {})
if bing_grounding_details:
print(f" Bing Grounding ID: {bing_grounding_details.get('requesturl')}")
print(f" Bing Grounding Full response: {bing_grounding_details}")

print() # add an extra newline between steps

Expand All @@ -104,3 +115,11 @@
print(f"Agent response: {text_message.text.value}")
for annotation in response_message.url_citation_annotations:
print(f"URL Citation: [{annotation.url_citation.title}]({annotation.url_citation.url})")

# Evaluate the run using the converter
converter = AIAgentConverter(project_client)
converted_output = converter.convert(thread_id=thread.id, run_id=run.id)

tool_call_accuracy = ToolCallAccuracyEvaluator(model_config=model_config)
response = tool_call_accuracy(**converted_output)
print(f"Tool call accuracy Response: {json.dumps(response, indent=4)}")
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,29 @@
the "Models + endpoints" tab in your Azure AI Foundry project.
"""

import json
import os
from azure.ai.projects import AIProjectClient
from azure.ai.agents.models import CodeInterpreterTool
from azure.ai.agents.models import FilePurpose, MessageRole
from azure.identity import DefaultAzureCredential
from azure.ai.evaluation import AIAgentConverter, ToolCallAccuracyEvaluator, AzureOpenAIModelConfiguration
from pathlib import Path
from dotenv import load_dotenv
load_dotenv()

asset_file_path = os.path.abspath(
os.path.join(os.path.dirname(__file__), "../assets/synthetic_500_quarterly_results.csv")
)

model_config = AzureOpenAIModelConfiguration(
azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
api_key=os.environ["AZURE_OPENAI_API_KEY"],
api_version=os.environ["AZURE_OPENAI_API_VERSION"],
azure_deployment=os.environ["MODEL_DEPLOYMENT_NAME"],
)


project_client = AIProjectClient(
endpoint=os.environ["PROJECT_ENDPOINT"],
credential=DefaultAzureCredential(),
Expand Down Expand Up @@ -81,6 +93,23 @@
agents_client.files.delete(file.id)
print("Deleted file")

# Fetch run steps to get the details of the agent run
run_steps = agents_client.run_steps.list(thread_id=thread.id, run_id=run.id)
for step in run_steps:
print(f"Step {step['id']} status: {step['status']}")
step_details = step.get("step_details", {})
tool_calls = step_details.get("tool_calls", [])

if tool_calls:
print(" Tool calls:")
for call in tool_calls:
print(f" Tool Call ID: {call.get('id')}")
print(f" Type: {call.get('type')}")
print(f" Input: {call.get("code_interpreter").input if call.get('code_interpreter') else 'N/A'}")
print(f" Output: {call.get("code_interpreter").outputs if call.get('code_interpreter') else 'N/A'}")
print("***********Tool Call End***********")


# [START get_messages_and_save_files]
messages = agents_client.messages.list(thread_id=thread.id)
print(f"Messages: {messages}")
Expand Down Expand Up @@ -109,3 +138,11 @@

agents_client.delete_agent(agent.id)
print("Deleted agent")

# Evaluate the run using the converter
converter = AIAgentConverter(project_client)
converted_output = converter.convert(thread_id=thread.id, run_id=run.id)

tool_call_accuracy = ToolCallAccuracyEvaluator(model_config=model_config)
response = tool_call_accuracy(**converted_output)
print(f"Tool call accuracy Response: {json.dumps(response, indent=4)}")
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,28 @@
the "Models + endpoints" tab in your Azure AI Foundry project.
"""

import json
import os
from azure.ai.projects import AIProjectClient
from azure.ai.evaluation import AIAgentConverter, ToolCallAccuracyEvaluator, AzureOpenAIModelConfiguration, GroundednessEvaluator
from azure.ai.agents.models import (
FileSearchTool,
FilePurpose,
ListSortOrder,
)
from azure.identity import DefaultAzureCredential
from dotenv import load_dotenv
load_dotenv()

asset_file_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../assets/product_info_1.md"))

model_config = AzureOpenAIModelConfiguration(
azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
api_key=os.environ["AZURE_OPENAI_API_KEY"],
api_version=os.environ["AZURE_OPENAI_API_VERSION"],
azure_deployment=os.environ["MODEL_DEPLOYMENT_NAME"],
)

project_client = AIProjectClient(
endpoint=os.environ["PROJECT_ENDPOINT"],
credential=DefaultAzureCredential(),
Expand Down Expand Up @@ -76,6 +87,28 @@
# Create and process agent run in thread with tools
run = agents_client.runs.create_and_process(thread_id=thread.id, agent_id=agent.id)
print(f"Run finished with status: {run.status}")

run_steps = agents_client.run_steps.list(thread_id=thread.id, run_id=run.id, include=["step_details.tool_calls[*].file_search.results[*].content"])
for step in run_steps:
print(f"Step {step['id']} status: {step['status']}")
step_details = step.get("step_details", {})
tool_calls = step_details.get("tool_calls", [])

if tool_calls:
print(" Tool calls:")
for call in tool_calls:
print(f" Tool Call ID: {call.get('id')}")
print(f" Type: {call.get('type')}")

file_search_details = call.get("file_search", {})
if file_search_details:
print(f" file_search ranking_options/inputs: {json.dumps(file_search_details.get('ranking_options').as_dict(), indent=6)}")
print(f" file_search results/outputs:")
results = file_search_details.get('results', [])
for i, result in enumerate(results, 1):
print(f" Result {i}: {json.dumps(result.as_dict(), indent=8)}")
print() # add line after each result
print() # add an extra newline between steps

if run.status == "failed":
# Check if you got "Rate limit is exceeded.", then you want to get more quota
Expand All @@ -84,6 +117,7 @@
# [START teardown]
# Delete the file when done
agents_client.vector_stores.delete(vector_store.id)
print("vector store id " + vector_store.id)
print("Deleted vector store")

agents_client.files.delete(file_id=file.id)
Expand All @@ -102,3 +136,16 @@
if msg.text_messages:
last_text = msg.text_messages[-1]
print(f"{msg.role}: {last_text.text.value}")


# Evaluate the run using the converter
converter = AIAgentConverter(project_client)
converted_output = converter.convert(thread_id=thread.id, run_id=run.id)

tool_call_accuracy = ToolCallAccuracyEvaluator(model_config=model_config)
response = tool_call_accuracy(**converted_output)
print(f"Tool call accuracy Response: {json.dumps(response, indent=4)}")

groundedness_evaluator = GroundednessEvaluator(model_config=model_config)
groundedness_response = groundedness_evaluator(query=converted_output["query"], context=converted_output["response"])
print(f"Groundedness Response: {json.dumps(groundedness_response, indent=4)}")
Original file line number Diff line number Diff line change
Expand Up @@ -242,17 +242,32 @@ def _extract_typed_messages(ai_services_messages) -> List[Message]:
# crash on one of the historical messages, let's check for it and bail out from this iteration.
if len(single_turn.content) < 1:
continue

# Build the content of the text message.
content = {
"type": "text",
"text": single_turn.content[0].text.value,
}

content_list = []
# If content is a list, process all content items.
for content_item in single_turn.content:
if content_item.type == "text":
content_list.append({
"type": "text",
"text": content_item.text.value,
})
elif content_item.type == "image":
content_list.append({
"type": "image",
"image": {
"file_id": content_item.image_file.file_id,
}
})
# # Build the content of the text message.
# content = {
# "type": "text",
# "text": single_turn.content[0].text.value,
# }

# If we have a user message, then we save it as such and since it's a human message, there is no
# run_id associated with it.
if single_turn.role == _USER:
final_messages.append(UserMessage(content=[content], createdAt=single_turn.created_at))
final_messages.append(UserMessage(content=content_list, createdAt=single_turn.created_at))
continue

# In this case, we have an assistant message. Unfortunately, this would only have the user-facing
Expand All @@ -261,7 +276,7 @@ def _extract_typed_messages(ai_services_messages) -> List[Message]:
if single_turn.role == _AGENT:
# We are required to put the run_id in the assistant message.
final_messages.append(
AssistantMessage(content=[content], run_id=single_turn.run_id, createdAt=single_turn.created_at)
AssistantMessage(content=content_list, run_id=single_turn.run_id, createdAt=single_turn.created_at)
)
continue

Expand Down Expand Up @@ -788,6 +803,7 @@ def _list_run_steps_chronological(self, thread_id: str, run_id: str):
limit=self._AI_SERVICES_API_MAX_LIMIT,
order="asc",
after=after,
include=["step_details.tool_calls[*].file_search.results[*].content"]
)
has_more = run_steps.has_more
after = run_steps.last_id
Expand Down Expand Up @@ -837,7 +853,8 @@ def _list_run_steps_chronological(self, thread_id: str, run_id: str):
thread_id=thread_id,
run_id=run_id,
limit=self._AI_SERVICES_API_MAX_LIMIT,
order="asc"
order="asc",
include=["step_details.tool_calls[*].file_search.results[*].content"]
)

def _list_run_ids_chronological(self, thread_id: str) -> List[str]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@
+ "generate code, and create graphs and charts using your data. Supports "
+ "up to 20 files.",
_BING_GROUNDING: "Enhance model output with web data.",
_FILE_SEARCH: "Search for data across uploaded files.",
_AZURE_AI_SEARCH: "Search an Azure AI Search index for relevant data.",
_FILE_SEARCH: "Lets agents access user-uploaded files (PDFs, Word, Excel, etc.) for information retrieval. Grounding responses in these files ensures answers are personalized and accurate.",
_AZURE_AI_SEARCH: "Enables agents to retrieve and ground responses in enterprise data indexed in Azure AI Search. This allows agents to provide accurate, context-aware answers based on internal knowledge bases.",
_FABRIC_DATAAGENT: "Connect to Microsoft Fabric data agents to retrieve data across different data sources.",
}

Expand Down Expand Up @@ -284,17 +284,18 @@ def break_tool_call_into_messages(tool_call: ToolCall, run_id: str) -> List[Mess
# Try to retrieve it, but if we don't find anything, skip adding the message
# Just manually converting to dicts for easy serialization for now rather than custom serializers
if tool_call.details.type == _CODE_INTERPRETER:
output = tool_call.details.code_interpreter.outputs
output = [result.as_dict() for result in tool_call.details.code_interpreter.outputs]
elif tool_call.details.type == _BING_GROUNDING:
return messages # not supported yet from bing grounding tool
elif tool_call.details.type == _FILE_SEARCH:
output = [
{
"file_id": result.file_id,
"file_name": result.file_name,
"score": result.score,
"content": result.content,
}
# {
# "file_id": result.file_id,
# "file_name": result.file_name,
# "score": result.score,
# "content": result.content,
# }
result.as_dict()
for result in tool_call.details.file_search.results
]
elif tool_call.details.type == _AZURE_AI_SEARCH:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ def _is_applicable_tool(self, eval_input):
if tool_definition is None or len(tool_definition) != 1:
return False
tool_type = tool_definition[0].get("type")
if tool_type is None or tool_type != "function":
if tool_type is None or tool_type != "function" and tool_type not in ["file_search", "azure_ai_search", "bing_grounding", "code_interpreter"]:
return False
return True

Expand Down
Loading