@@ -494,14 +494,17 @@ def _extract_text_from_content(content):
494
494
return text
495
495
496
496
497
- def _get_conversation_history (query ):
497
+ def _get_conversation_history (query , include_system_messages = False ):
498
498
all_user_queries = []
499
499
cur_user_query = []
500
500
all_agent_responses = []
501
501
cur_agent_response = []
502
+ system_message = None
502
503
for msg in query :
503
504
if not "role" in msg :
504
505
continue
506
+ if include_system_messages and msg ["role" ] == "system" and "content" in msg :
507
+ system_message = msg .get ("content" , "" )
505
508
if msg ["role" ] == "user" and "content" in msg :
506
509
if cur_agent_response != []:
507
510
all_agent_responses .append (cur_agent_response )
@@ -530,13 +533,18 @@ def _get_conversation_history(query):
530
533
category = ErrorCategory .INVALID_VALUE ,
531
534
blame = ErrorBlame .USER_ERROR ,
532
535
)
533
-
534
- return {"user_queries" : all_user_queries , "agent_responses" : all_agent_responses }
536
+ result = {"user_queries" : all_user_queries , "agent_responses" : all_agent_responses }
537
+ if include_system_messages :
538
+ result ["system_message" ] = system_message
539
+ return result
535
540
536
541
537
542
def _pretty_format_conversation_history (conversation_history ):
538
543
"""Formats the conversation history for better readability."""
539
544
formatted_history = ""
545
+ if "system_message" in conversation_history and conversation_history ["system_message" ] is not None :
546
+ formatted_history += "SYSTEM_PROMPT:\n "
547
+ formatted_history += " " + conversation_history ["system_message" ] + "\n \n "
540
548
for i , (user_query , agent_response ) in enumerate (
541
549
zip (conversation_history ["user_queries" ], conversation_history ["agent_responses" ] + [None ])
542
550
):
@@ -552,10 +560,10 @@ def _pretty_format_conversation_history(conversation_history):
552
560
return formatted_history
553
561
554
562
555
- def reformat_conversation_history (query , logger = None ):
563
+ def reformat_conversation_history (query , logger = None , include_system_messages = False ):
556
564
"""Reformats the conversation history to a more compact representation."""
557
565
try :
558
- conversation_history = _get_conversation_history (query )
566
+ conversation_history = _get_conversation_history (query , include_system_messages = include_system_messages )
559
567
return _pretty_format_conversation_history (conversation_history )
560
568
except :
561
569
# If the conversation history cannot be parsed for whatever reason (e.g. the converter format changed), the original query is returned
@@ -570,22 +578,53 @@ def reformat_conversation_history(query, logger=None):
570
578
return query
571
579
572
580
573
- def _get_agent_response (agent_response_msgs ):
574
- """Extracts the text from the agent response content ."""
581
+ def _get_agent_response (agent_response_msgs , include_tool_messages = False ):
582
+ """Extracts formatted agent response including text, and optionally tool calls/results ."""
575
583
agent_response_text = []
584
+ tool_results = {}
585
+
586
+ # First pass: collect tool results
587
+ if include_tool_messages :
588
+ for msg in agent_response_msgs :
589
+ if msg .get ("role" ) == "tool" and "tool_call_id" in msg :
590
+ for content in msg .get ("content" , []):
591
+ if content .get ("type" ) == "tool_result" :
592
+ result = content .get ("tool_result" )
593
+ tool_results [msg ["tool_call_id" ]] = f"[TOOL_RESULT] { result } "
594
+
595
+ # Second pass: parse assistant messages and tool calls
576
596
for msg in agent_response_msgs :
577
- if "role" in msg and msg [ "role" ] == "assistant" and "content" in msg :
597
+ if "role" in msg and msg . get ( "role" ) == "assistant" and "content" in msg :
578
598
text = _extract_text_from_content (msg ["content" ])
579
599
if text :
580
600
agent_response_text .extend (text )
601
+ if include_tool_messages :
602
+ for content in msg .get ("content" , []):
603
+ # Todo: Verify if this is the correct way to handle tool calls
604
+ if content .get ("type" ) == "tool_call" :
605
+ if "tool_call" in content and "function" in content .get ("tool_call" , {}):
606
+ tc = content .get ("tool_call" , {})
607
+ func_name = tc .get ("function" , {}).get ("name" , "" )
608
+ args = tc .get ("function" , {}).get ("arguments" , {})
609
+ tool_call_id = tc .get ("id" )
610
+ else :
611
+ tool_call_id = content .get ("tool_call_id" )
612
+ func_name = content .get ("name" , "" )
613
+ args = content .get ("arguments" , {})
614
+ args_str = ", " .join (f'{ k } ="{ v } "' for k , v in args .items ())
615
+ call_line = f"[TOOL_CALL] { func_name } ({ args_str } )"
616
+ agent_response_text .append (call_line )
617
+ if tool_call_id in tool_results :
618
+ agent_response_text .append (tool_results [tool_call_id ])
619
+
581
620
return agent_response_text
582
621
583
622
584
- def reformat_agent_response (response , logger = None ):
623
+ def reformat_agent_response (response , logger = None , include_tool_messages = False ):
585
624
try :
586
625
if response is None or response == []:
587
626
return ""
588
- agent_response = _get_agent_response (response )
627
+ agent_response = _get_agent_response (response , include_tool_messages = include_tool_messages )
589
628
if agent_response == []:
590
629
# If no message could be extracted, likely the format changed, fallback to the original response in that case
591
630
if logger :
@@ -602,6 +641,26 @@ def reformat_agent_response(response, logger=None):
602
641
return response
603
642
604
643
644
+ def reformat_tool_definitions (tool_definitions , logger = None ):
645
+ try :
646
+ output_lines = ["TOOL_DEFINITIONS:" ]
647
+ for tool in tool_definitions :
648
+ name = tool .get ("name" , "unnamed_tool" )
649
+ desc = tool .get ("description" , "" ).strip ()
650
+ params = tool .get ("parameters" , {}).get ("properties" , {})
651
+ param_names = ", " .join (params .keys ()) if params else "no parameters"
652
+ output_lines .append (f"- { name } : { desc } (inputs: { param_names } )" )
653
+ return "\n " .join (output_lines )
654
+ except Exception as e :
655
+ # If the tool definitions cannot be parsed for whatever reason, the original tool definitions are returned
656
+ # This is a fallback to ensure that the evaluation can still proceed. See comments on reformat_conversation_history for more details.
657
+ if logger :
658
+ logger .warning (
659
+ f"Tool definitions could not be parsed, falling back to original definitions: { tool_definitions } "
660
+ )
661
+ return tool_definitions
662
+
663
+
605
664
def upload (path : str , container_client : ContainerClient , logger = None ):
606
665
"""Upload files or directories to Azure Blob Storage using a container client.
607
666
0 commit comments