Removed 'applicable' field and print statement

Salma Elshafey · Salma Elshafey · commit d0f637ea0869 · 2025-06-30T23:56:23.000+03:00
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py
@@ -215,7 +215,6 @@ async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[float, str]]:  # t
                 f"{self._result_key}_result": score_result,
                 f"{self._result_key}_threshold": self.threshold,
                 f"{self._result_key}_reason": reason,
-                'applicable': True,
                 'per_tool_call_details': llm_output.get('additional_details', {}),
                 self._EXCESS_TOOL_CALLS_KEY: llm_output.get(self._EXCESS_TOOL_CALLS_KEY, {}),
                 self._MISSING_TOOL_CALLS_KEY: llm_output.get(self._MISSING_TOOL_CALLS_KEY, {}),
@@ -250,7 +249,7 @@ async def _real_call(self, **kwargs):
     
     def _not_applicable_result(self, error_message):
         """Return a result indicating that the tool call is not applicable for evaluation.
-
+pr
         :param eval_input: The input to the evaluator.
         :type eval_input: Dict
         :return: A dictionary containing the result of the evaluation.
@@ -262,7 +261,6 @@ def _not_applicable_result(self, error_message):
             f"{self._result_key}_result": 'pass',
             f"{self._result_key}_threshold": self.threshold,
             f"{self._result_key}_reason": error_message,
-            "applicable": False,
             "per_tool_call_details": {},
             self._EXCESS_TOOL_CALLS_KEY: {},
             self._MISSING_TOOL_CALLS_KEY: {},
@@ -280,7 +278,6 @@ def _parse_tools_from_response(self, response):
         tool_results_map = {}
         if isinstance(response, list):
             for message in response:                
-                print(message)
                 # Extract tool calls from assistant messages
                 if message.get("role") == "assistant" and isinstance(message.get("content"), list):
                     for content_item in message.get("content"):
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_agent_evaluators.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_agent_evaluators.py
@@ -26,7 +26,6 @@ def test_tool_call_accuracy_evaluator_missing_inputs(self, mock_model_config):
                 }
             }]
         )
-        assert not result["applicable"]
         assert result[ToolCallAccuracyEvaluator._RESULT_KEY] == ToolCallAccuracyEvaluator._NOT_APPLICABLE_RESULT
         assert ToolCallAccuracyEvaluator._NO_TOOL_CALLS_MESSAGE in result[f"{ToolCallAccuracyEvaluator._RESULT_KEY}_reason"]
 
@@ -42,7 +41,6 @@ def test_tool_call_accuracy_evaluator_missing_inputs(self, mock_model_config):
                 }
             }]
         )
-        assert not result["applicable"]
         assert result[ToolCallAccuracyEvaluator._RESULT_KEY] == ToolCallAccuracyEvaluator._NOT_APPLICABLE_RESULT
         assert ToolCallAccuracyEvaluator._NO_TOOL_DEFINITIONS_MESSAGE in result[f"{ToolCallAccuracyEvaluator._RESULT_KEY}_reason"]
 
@@ -64,7 +62,6 @@ def test_tool_call_accuracy_evaluator_missing_inputs(self, mock_model_config):
                 }
             }]
         )
-        assert not result["applicable"]
         assert result[ToolCallAccuracyEvaluator._RESULT_KEY] == ToolCallAccuracyEvaluator._NOT_APPLICABLE_RESULT
         assert ToolCallAccuracyEvaluator._NO_TOOL_CALLS_MESSAGE in result[f"{ToolCallAccuracyEvaluator._RESULT_KEY}_reason"]
 
@@ -90,6 +87,5 @@ def test_tool_call_accuracy_evaluator_missing_inputs(self, mock_model_config):
                 }
             }]
         )
-        assert not result["applicable"]
         assert result[ToolCallAccuracyEvaluator._RESULT_KEY] == ToolCallAccuracyEvaluator._NOT_APPLICABLE_RESULT
         assert ToolCallAccuracyEvaluator._TOOL_DEFINITIONS_MISSING_MESSAGE in result[f"{ToolCallAccuracyEvaluator._RESULT_KEY}_reason"]
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_tool_call_accuracy_evaluator.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_tool_call_accuracy_evaluator.py
@@ -104,7 +104,6 @@ def test_evaluate_tools_valid1(self, mock_model_config):
         assert "per_tool_call_details" in result
         assert ToolCallAccuracyEvaluator._EXCESS_TOOL_CALLS_KEY in result
         assert ToolCallAccuracyEvaluator._MISSING_TOOL_CALLS_KEY in result
-        assert result["applicable"] is True
 
     def test_evaluate_tools_valid2(self, mock_model_config):
         evaluator = ToolCallAccuracyEvaluator(model_config=mock_model_config)
@@ -163,7 +162,6 @@ def test_evaluate_tools_valid2(self, mock_model_config):
         assert "per_tool_call_details" in result
         assert ToolCallAccuracyEvaluator._EXCESS_TOOL_CALLS_KEY in result
         assert ToolCallAccuracyEvaluator._MISSING_TOOL_CALLS_KEY in result
-        assert result["applicable"] is True
 
     def test_evaluate_tools_valid3(self, mock_model_config):
         evaluator = ToolCallAccuracyEvaluator(model_config=mock_model_config)
@@ -222,7 +220,6 @@ def test_evaluate_tools_valid3(self, mock_model_config):
         assert "per_tool_call_details" in result
         assert ToolCallAccuracyEvaluator._EXCESS_TOOL_CALLS_KEY in result
         assert ToolCallAccuracyEvaluator._MISSING_TOOL_CALLS_KEY in result
-        assert result["applicable"] is True
 
     def test_evaluate_tools_one_eval_fails(self, mock_model_config):
         with pytest.raises(EvaluationException) as exc_info:
@@ -311,7 +308,6 @@ def test_evaluate_tools_some_not_applicable(self, mock_model_config):
         assert result["per_tool_call_details"] == {}
         assert result[ToolCallAccuracyEvaluator._EXCESS_TOOL_CALLS_KEY] == {}
         assert result[ToolCallAccuracyEvaluator._MISSING_TOOL_CALLS_KEY] == {}
-        assert result["applicable"] is False
 
     def test_evaluate_tools_all_not_applicable(self, mock_model_config):
         evaluator = ToolCallAccuracyEvaluator(model_config=mock_model_config)
@@ -351,7 +347,6 @@ def test_evaluate_tools_all_not_applicable(self, mock_model_config):
         assert result["per_tool_call_details"] == {}
         assert result[ToolCallAccuracyEvaluator._EXCESS_TOOL_CALLS_KEY] == {}
         assert result[ToolCallAccuracyEvaluator._MISSING_TOOL_CALLS_KEY] == {}
-        assert result["applicable"] is False
 
     def test_evaluate_tools_no_tools(self, mock_model_config):
         evaluator = ToolCallAccuracyEvaluator(model_config=mock_model_config)
@@ -383,5 +378,4 @@ def test_evaluate_tools_no_tools(self, mock_model_config):
         assert result[f"{key}_reason"] == ToolCallAccuracyEvaluator._NO_TOOL_CALLS_MESSAGE
         assert result["per_tool_call_details"] == {}
         assert result[ToolCallAccuracyEvaluator._EXCESS_TOOL_CALLS_KEY] == {}
-        assert result[ToolCallAccuracyEvaluator._MISSING_TOOL_CALLS_KEY] == {}
-        assert result["applicable"] is False
+        assert result[ToolCallAccuracyEvaluator._MISSING_TOOL_CALLS_KEY] == {}

Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,6 @@ def test_tool_call_accuracy_evaluator_missing_inputs(self, mock_model_config):`
`26`	`26`	`}`
`27`	`27`	`}]`
`28`	`28`	`)`
`29`		`- assert not result["applicable"]`
`30`	`29`	`assert result[ToolCallAccuracyEvaluator._RESULT_KEY] == ToolCallAccuracyEvaluator._NOT_APPLICABLE_RESULT`
`31`	`30`	`assert ToolCallAccuracyEvaluator._NO_TOOL_CALLS_MESSAGE in result[f"{ToolCallAccuracyEvaluator._RESULT_KEY}_reason"]`
`32`	`31`
`@@ -42,7 +41,6 @@ def test_tool_call_accuracy_evaluator_missing_inputs(self, mock_model_config):`
`42`	`41`	`}`
`43`	`42`	`}]`
`44`	`43`	`)`
`45`		`- assert not result["applicable"]`
`46`	`44`	`assert result[ToolCallAccuracyEvaluator._RESULT_KEY] == ToolCallAccuracyEvaluator._NOT_APPLICABLE_RESULT`
`47`	`45`	`assert ToolCallAccuracyEvaluator._NO_TOOL_DEFINITIONS_MESSAGE in result[f"{ToolCallAccuracyEvaluator._RESULT_KEY}_reason"]`
`48`	`46`
`@@ -64,7 +62,6 @@ def test_tool_call_accuracy_evaluator_missing_inputs(self, mock_model_config):`
`64`	`62`	`}`
`65`	`63`	`}]`
`66`	`64`	`)`
`67`		`- assert not result["applicable"]`
`68`	`65`	`assert result[ToolCallAccuracyEvaluator._RESULT_KEY] == ToolCallAccuracyEvaluator._NOT_APPLICABLE_RESULT`
`69`	`66`	`assert ToolCallAccuracyEvaluator._NO_TOOL_CALLS_MESSAGE in result[f"{ToolCallAccuracyEvaluator._RESULT_KEY}_reason"]`
`70`	`67`
`@@ -90,6 +87,5 @@ def test_tool_call_accuracy_evaluator_missing_inputs(self, mock_model_config):`
`90`	`87`	`}`
`91`	`88`	`}]`
`92`	`89`	`)`
`93`		`- assert not result["applicable"]`
`94`	`90`	`assert result[ToolCallAccuracyEvaluator._RESULT_KEY] == ToolCallAccuracyEvaluator._NOT_APPLICABLE_RESULT`
`95`	`91`	`assert ToolCallAccuracyEvaluator._TOOL_DEFINITIONS_MISSING_MESSAGE in result[f"{ToolCallAccuracyEvaluator._RESULT_KEY}_reason"]`