Azure · singankit · Jul 1, 2025 · Jul 3, 2025 · Jul 24, 2025
@@ -34,10 +34,22 @@
        as found under the "Name" column in the "Connected Resources" tab in your Azure AI Foundry project.
 """
 
+import json
 import os
 from azure.ai.projects import AIProjectClient
 from azure.identity import DefaultAzureCredential
 from azure.ai.agents.models import AzureAISearchQueryType, AzureAISearchTool, ListSortOrder, MessageRole
+from dotenv import load_dotenv
+from azure.ai.evaluation import AIAgentConverter, ToolCallAccuracyEvaluator, AzureOpenAIModelConfiguration
+
+load_dotenv()
+
+model_config = AzureOpenAIModelConfiguration(
+    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
+    api_key=os.environ["AZURE_OPENAI_API_KEY"],
+    api_version=os.environ["AZURE_OPENAI_API_VERSION"],
+    azure_deployment=os.environ["MODEL_DEPLOYMENT_NAME"],
+)
 
 project_client = AIProjectClient(
     endpoint=os.environ["PROJECT_ENDPOINT"],
@@ -51,7 +63,7 @@
 
 # Initialize agent AI search tool and add the search index connection id
 ai_search = AzureAISearchTool(
-    index_connection_id=conn_id, index_name="sample_index", query_type=AzureAISearchQueryType.SIMPLE, top_k=3, filter=""
+    index_connection_id=conn_id, index_name="contoso-manuals-index", query_type=AzureAISearchQueryType.SIMPLE, top_k=3, filter=""
 )
 
 # Create agent with AI search tool and process agent run
@@ -66,7 +78,7 @@
     agent = agents_client.create_agent(
         model=os.environ["MODEL_DEPLOYMENT_NAME"],
         name="my-agent",
-        instructions="You are a helpful agent",
+        instructions="Hello, you are helpful agent and can search information from search index using Azure AI Search tool provided. Use the tool to answer questions about Contoso products.",
         tools=ai_search.definitions,
         tool_resources=ai_search.resources,
     )
@@ -81,7 +93,7 @@
     message = agents_client.messages.create(
         thread_id=thread.id,
         role="user",
-        content="What is the temperature rating of the cozynights sleeping bag?",
+        content="What contso tent do you recommend for hiking ?",
     )
     print(f"Created message, ID: {message.id}")
 
@@ -108,7 +120,7 @@
                 azure_ai_search_details = call.get("azure_ai_search", {})
                 if azure_ai_search_details:
                     print(f"    azure_ai_search input: {azure_ai_search_details.get('input')}")
-                    print(f"    azure_ai_search output: {azure_ai_search_details.get('output')}")
+                    print(f"    azure_ai_search output: {json.dumps(azure_ai_search_details.get('output'), indent=2)}")
         print()  # add an extra newline between steps
 
     # Delete the agent when done
@@ -133,3 +145,12 @@
             for message_text in message.text_messages:
                 print(f"{message.role}: {message_text.text.value}")
     # [END populate_references_agent_with_azure_ai_search_tool]
+
+    # Evaluate the run using the converter
+    converter = AIAgentConverter(project_client)
+    converted_output = converter.convert(thread_id=thread.id, run_id=run.id)
+    print(converted_output)
+
+    tool_call_accuracy = ToolCallAccuracyEvaluator(model_config=model_config)
+    response = tool_call_accuracy(**converted_output)
+    print(f"Tool call accuracy: {json.dumps(response, indent=4)}")
@@ -25,11 +25,21 @@
        /subscriptions/{subscription-id}/resourceGroups/{resource-group-name}/providers/Microsoft.MachineLearningServices/workspaces/{workspace-name}/connections/{connection-name}
 """
 
+import json
 import os
 from azure.ai.projects import AIProjectClient
 from azure.ai.agents.models import MessageRole, BingGroundingTool
 from azure.identity import DefaultAzureCredential
-
+from azure.ai.evaluation import AIAgentConverter, ToolCallAccuracyEvaluator, AzureOpenAIModelConfiguration
+from dotenv import load_dotenv
+load_dotenv()
+
+model_config = AzureOpenAIModelConfiguration(
+    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
+    api_key=os.environ["AZURE_OPENAI_API_KEY"],
+    api_version=os.environ["AZURE_OPENAI_API_VERSION"],
+    azure_deployment=os.environ["MODEL_DEPLOYMENT_NAME"],
+)
 
 project_client = AIProjectClient(
     endpoint=os.environ["PROJECT_ENDPOINT"],
@@ -90,6 +100,7 @@
                 bing_grounding_details = call.get("bing_grounding", {})
                 if bing_grounding_details:
                     print(f"    Bing Grounding ID: {bing_grounding_details.get('requesturl')}")
+                    print(f"    Bing Grounding Full response: {bing_grounding_details}")
 
         print()  # add an extra newline between steps
 
@@ -104,3 +115,11 @@
             print(f"Agent response: {text_message.text.value}")
         for annotation in response_message.url_citation_annotations:
             print(f"URL Citation: [{annotation.url_citation.title}]({annotation.url_citation.url})")
+
+    # Evaluate the run using the converter
+    converter = AIAgentConverter(project_client)
+    converted_output = converter.convert(thread_id=thread.id, run_id=run.id)
+
+    tool_call_accuracy = ToolCallAccuracyEvaluator(model_config=model_config)
+    response = tool_call_accuracy(**converted_output)
+    print(f"Tool call accuracy Response: {json.dumps(response, indent=4)}")
@@ -23,17 +23,29 @@
        the "Models + endpoints" tab in your Azure AI Foundry project.
 """
 
+import json
 import os
 from azure.ai.projects import AIProjectClient
 from azure.ai.agents.models import CodeInterpreterTool
 from azure.ai.agents.models import FilePurpose, MessageRole
 from azure.identity import DefaultAzureCredential
+from azure.ai.evaluation import AIAgentConverter, ToolCallAccuracyEvaluator, AzureOpenAIModelConfiguration
 from pathlib import Path
+from dotenv import load_dotenv
+load_dotenv()
 
 asset_file_path = os.path.abspath(
     os.path.join(os.path.dirname(__file__), "../assets/synthetic_500_quarterly_results.csv")
 )
 
+model_config = AzureOpenAIModelConfiguration(
+    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
+    api_key=os.environ["AZURE_OPENAI_API_KEY"],
+    api_version=os.environ["AZURE_OPENAI_API_VERSION"],
+    azure_deployment=os.environ["MODEL_DEPLOYMENT_NAME"],
+)
+
+
 project_client = AIProjectClient(
     endpoint=os.environ["PROJECT_ENDPOINT"],
     credential=DefaultAzureCredential(),
@@ -81,6 +93,23 @@
     agents_client.files.delete(file.id)
     print("Deleted file")
 
+    # Fetch run steps to get the details of the agent run
+    run_steps = agents_client.run_steps.list(thread_id=thread.id, run_id=run.id)
+    for step in run_steps:
+        print(f"Step {step['id']} status: {step['status']}")
+        step_details = step.get("step_details", {})
+        tool_calls = step_details.get("tool_calls", [])
+
+        if tool_calls:
+            print("  Tool calls:")
+            for call in tool_calls:
+                print(f"    Tool Call ID: {call.get('id')}")
+                print(f"    Type: {call.get('type')}")
+                print(f"    Input: {call.get("code_interpreter").input if call.get('code_interpreter') else 'N/A'}")
+                print(f"    Output: {call.get("code_interpreter").outputs if call.get('code_interpreter') else 'N/A'}")
+                print("***********Tool Call End***********")
+
+
     # [START get_messages_and_save_files]
     messages = agents_client.messages.list(thread_id=thread.id)
     print(f"Messages: {messages}")
@@ -109,3 +138,11 @@
 
     agents_client.delete_agent(agent.id)
     print("Deleted agent")
+
+        # Evaluate the run using the converter
+    converter = AIAgentConverter(project_client)
+    converted_output = converter.convert(thread_id=thread.id, run_id=run.id)
+
+    tool_call_accuracy = ToolCallAccuracyEvaluator(model_config=model_config)
+    response = tool_call_accuracy(**converted_output)
+    print(f"Tool call accuracy Response: {json.dumps(response, indent=4)}")
@@ -22,17 +22,28 @@
        the "Models + endpoints" tab in your Azure AI Foundry project.
 """
 
+import json
 import os
 from azure.ai.projects import AIProjectClient
+from azure.ai.evaluation import AIAgentConverter, ToolCallAccuracyEvaluator, AzureOpenAIModelConfiguration, GroundednessEvaluator
 from azure.ai.agents.models import (
     FileSearchTool,
     FilePurpose,
     ListSortOrder,
 )
 from azure.identity import DefaultAzureCredential
+from dotenv import load_dotenv
+load_dotenv()
 
 asset_file_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../assets/product_info_1.md"))
 
+model_config = AzureOpenAIModelConfiguration(
+    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
+    api_key=os.environ["AZURE_OPENAI_API_KEY"],
+    api_version=os.environ["AZURE_OPENAI_API_VERSION"],
+    azure_deployment=os.environ["MODEL_DEPLOYMENT_NAME"],
+)
+
 project_client = AIProjectClient(
     endpoint=os.environ["PROJECT_ENDPOINT"],
     credential=DefaultAzureCredential(),
@@ -76,6 +87,28 @@
     # Create and process agent run in thread with tools
     run = agents_client.runs.create_and_process(thread_id=thread.id, agent_id=agent.id)
     print(f"Run finished with status: {run.status}")
+
+    run_steps = agents_client.run_steps.list(thread_id=thread.id, run_id=run.id, include=["step_details.tool_calls[*].file_search.results[*].content"])
+    for step in run_steps:
+        print(f"Step {step['id']} status: {step['status']}")
+        step_details = step.get("step_details", {})
+        tool_calls = step_details.get("tool_calls", [])
+
+        if tool_calls:
+            print("  Tool calls:")
+            for call in tool_calls:
+                print(f"    Tool Call ID: {call.get('id')}")
+                print(f"    Type: {call.get('type')}")
+
+                file_search_details = call.get("file_search", {})
+                if file_search_details:
+                    print(f"    file_search ranking_options/inputs: {json.dumps(file_search_details.get('ranking_options').as_dict(), indent=6)}")
+                    print(f"    file_search results/outputs:")
+                    results = file_search_details.get('results', [])
+                    for i, result in enumerate(results, 1):
+                        print(f"      Result {i}: {json.dumps(result.as_dict(), indent=8)}")
+                        print()  # add line after each result
+        print()  # add an extra newline between steps
 
     if run.status == "failed":
         # Check if you got "Rate limit is exceeded.", then you want to get more quota
@@ -84,6 +117,7 @@
     # [START teardown]
     # Delete the file when done
     agents_client.vector_stores.delete(vector_store.id)
+    print("vector store id " + vector_store.id)
     print("Deleted vector store")
 
     agents_client.files.delete(file_id=file.id)
@@ -102,3 +136,16 @@
         if msg.text_messages:
             last_text = msg.text_messages[-1]
             print(f"{msg.role}: {last_text.text.value}")
+
+
+    # Evaluate the run using the converter
+    converter = AIAgentConverter(project_client)
+    converted_output = converter.convert(thread_id=thread.id, run_id=run.id)
+
+    tool_call_accuracy = ToolCallAccuracyEvaluator(model_config=model_config)
+    response = tool_call_accuracy(**converted_output)
+    print(f"Tool call accuracy Response: {json.dumps(response, indent=4)}")
+
+    groundedness_evaluator = GroundednessEvaluator(model_config=model_config)
+    groundedness_response = groundedness_evaluator(query=converted_output["query"], context=converted_output["response"])
+    print(f"Groundedness Response: {json.dumps(groundedness_response, indent=4)}")
@@ -242,17 +242,32 @@ def _extract_typed_messages(ai_services_messages) -> List[Message]:
             # crash on one of the historical messages, let's check for it and bail out from this iteration.
             if len(single_turn.content) < 1:
                 continue
-
-            # Build the content of the text message.
-            content = {
-                "type": "text",
-                "text": single_turn.content[0].text.value,
-            }
+
+            content_list = []
+            # If content is a list, process all content items.
+            for content_item in single_turn.content:
+                if content_item.type == "text":
+                    content_list.append({
+                        "type": "text",
+                        "text": content_item.text.value,
+                    })
+                elif content_item.type == "image":
+                    content_list.append({
+                        "type": "image",
+                        "image": {
+                            "file_id": content_item.image_file.file_id,
+                        }
+                    })
+            # # Build the content of the text message.
+            # content = {
+            #     "type": "text",
+            #     "text": single_turn.content[0].text.value,
+            # }
 
             # If we have a user message, then we save it as such and since it's a human message, there is no
             # run_id associated with it.
             if single_turn.role == _USER:
-                final_messages.append(UserMessage(content=[content], createdAt=single_turn.created_at))
+                final_messages.append(UserMessage(content=content_list, createdAt=single_turn.created_at))
                 continue
 
             # In this case, we have an assistant message. Unfortunately, this would only have the user-facing
@@ -261,7 +276,7 @@ def _extract_typed_messages(ai_services_messages) -> List[Message]:
             if single_turn.role == _AGENT:
                 # We are required to put the run_id in the assistant message.
                 final_messages.append(
-                    AssistantMessage(content=[content], run_id=single_turn.run_id, createdAt=single_turn.created_at)
+                    AssistantMessage(content=content_list, run_id=single_turn.run_id, createdAt=single_turn.created_at)
                 )
                 continue
 
@@ -788,6 +803,7 @@ def _list_run_steps_chronological(self, thread_id: str, run_id: str):
                 limit=self._AI_SERVICES_API_MAX_LIMIT,
                 order="asc",
                 after=after,
+                include=["step_details.tool_calls[*].file_search.results[*].content"]
             )
             has_more = run_steps.has_more
             after = run_steps.last_id
@@ -837,7 +853,8 @@ def _list_run_steps_chronological(self, thread_id: str, run_id: str):
                 thread_id=thread_id,
                 run_id=run_id,
                 limit=self._AI_SERVICES_API_MAX_LIMIT,
-                order="asc"
+                order="asc",
+                include=["step_details.tool_calls[*].file_search.results[*].content"]
             )
 
     def _list_run_ids_chronological(self, thread_id: str) -> List[str]:

@@ -43,8 +43,8 @@
     + "generate code, and create graphs and charts using your data. Supports "
     + "up to 20 files.",
     _BING_GROUNDING: "Enhance model output with web data.",
-    _FILE_SEARCH: "Search for data across uploaded files.",
-    _AZURE_AI_SEARCH: "Search an Azure AI Search index for relevant data.",
+    _FILE_SEARCH: "Lets agents access user-uploaded files (PDFs, Word, Excel, etc.) for information retrieval. Grounding responses in these files ensures answers are personalized and accurate.",
+    _AZURE_AI_SEARCH: "Enables agents to retrieve and ground responses in enterprise data indexed in Azure AI Search. This allows agents to provide accurate, context-aware answers based on internal knowledge bases.",
     _FABRIC_DATAAGENT: "Connect to Microsoft Fabric data agents to retrieve data across different data sources.",
 }
 
@@ -284,17 +284,18 @@ def break_tool_call_into_messages(tool_call: ToolCall, run_id: str) -> List[Mess
             # Try to retrieve it, but if we don't find anything, skip adding the message
             # Just manually converting to dicts for easy serialization for now rather than custom serializers
             if tool_call.details.type == _CODE_INTERPRETER:
-                output = tool_call.details.code_interpreter.outputs
+                output = [result.as_dict() for result in tool_call.details.code_interpreter.outputs]
             elif tool_call.details.type == _BING_GROUNDING:
                 return messages  # not supported yet from bing grounding tool
             elif tool_call.details.type == _FILE_SEARCH:
                 output = [
-                    {
-                        "file_id": result.file_id,
-                        "file_name": result.file_name,
-                        "score": result.score,
-                        "content": result.content,
-                    }
+                    # {
+                    #     "file_id": result.file_id,
+                    #     "file_name": result.file_name,
+                    #     "score": result.score,
+                    #     "content": result.content,
+                    # }
+                    result.as_dict()
                     for result in tool_call.details.file_search.results
                 ]
             elif tool_call.details.type == _AZURE_AI_SEARCH:

@@ -278,7 +278,7 @@ def _is_applicable_tool(self, eval_input):
         if tool_definition is None or len(tool_definition) != 1:
             return False
         tool_type = tool_definition[0].get("type")
-        if tool_type is None or tool_type != "function":
+        if tool_type is None or tool_type != "function" and tool_type not in  ["file_search", "azure_ai_search", "bing_grounding", "code_interpreter"]:
             return False
         return True