Skip to content

Commit d0f637e

Browse files
author
Salma Elshafey
committed
Removed 'applicable' field and print statement
1 parent 6c9e342 commit d0f637e

File tree

3 files changed

+2
-15
lines changed

3 files changed

+2
-15
lines changed

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,6 @@ async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[float, str]]: # t
215215
f"{self._result_key}_result": score_result,
216216
f"{self._result_key}_threshold": self.threshold,
217217
f"{self._result_key}_reason": reason,
218-
'applicable': True,
219218
'per_tool_call_details': llm_output.get('additional_details', {}),
220219
self._EXCESS_TOOL_CALLS_KEY: llm_output.get(self._EXCESS_TOOL_CALLS_KEY, {}),
221220
self._MISSING_TOOL_CALLS_KEY: llm_output.get(self._MISSING_TOOL_CALLS_KEY, {}),
@@ -250,7 +249,7 @@ async def _real_call(self, **kwargs):
250249

251250
def _not_applicable_result(self, error_message):
252251
"""Return a result indicating that the tool call is not applicable for evaluation.
253-
252+
pr
254253
:param eval_input: The input to the evaluator.
255254
:type eval_input: Dict
256255
:return: A dictionary containing the result of the evaluation.
@@ -262,7 +261,6 @@ def _not_applicable_result(self, error_message):
262261
f"{self._result_key}_result": 'pass',
263262
f"{self._result_key}_threshold": self.threshold,
264263
f"{self._result_key}_reason": error_message,
265-
"applicable": False,
266264
"per_tool_call_details": {},
267265
self._EXCESS_TOOL_CALLS_KEY: {},
268266
self._MISSING_TOOL_CALLS_KEY: {},
@@ -280,7 +278,6 @@ def _parse_tools_from_response(self, response):
280278
tool_results_map = {}
281279
if isinstance(response, list):
282280
for message in response:
283-
print(message)
284281
# Extract tool calls from assistant messages
285282
if message.get("role") == "assistant" and isinstance(message.get("content"), list):
286283
for content_item in message.get("content"):

sdk/evaluation/azure-ai-evaluation/tests/unittests/test_agent_evaluators.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ def test_tool_call_accuracy_evaluator_missing_inputs(self, mock_model_config):
2626
}
2727
}]
2828
)
29-
assert not result["applicable"]
3029
assert result[ToolCallAccuracyEvaluator._RESULT_KEY] == ToolCallAccuracyEvaluator._NOT_APPLICABLE_RESULT
3130
assert ToolCallAccuracyEvaluator._NO_TOOL_CALLS_MESSAGE in result[f"{ToolCallAccuracyEvaluator._RESULT_KEY}_reason"]
3231

@@ -42,7 +41,6 @@ def test_tool_call_accuracy_evaluator_missing_inputs(self, mock_model_config):
4241
}
4342
}]
4443
)
45-
assert not result["applicable"]
4644
assert result[ToolCallAccuracyEvaluator._RESULT_KEY] == ToolCallAccuracyEvaluator._NOT_APPLICABLE_RESULT
4745
assert ToolCallAccuracyEvaluator._NO_TOOL_DEFINITIONS_MESSAGE in result[f"{ToolCallAccuracyEvaluator._RESULT_KEY}_reason"]
4846

@@ -64,7 +62,6 @@ def test_tool_call_accuracy_evaluator_missing_inputs(self, mock_model_config):
6462
}
6563
}]
6664
)
67-
assert not result["applicable"]
6865
assert result[ToolCallAccuracyEvaluator._RESULT_KEY] == ToolCallAccuracyEvaluator._NOT_APPLICABLE_RESULT
6966
assert ToolCallAccuracyEvaluator._NO_TOOL_CALLS_MESSAGE in result[f"{ToolCallAccuracyEvaluator._RESULT_KEY}_reason"]
7067

@@ -90,6 +87,5 @@ def test_tool_call_accuracy_evaluator_missing_inputs(self, mock_model_config):
9087
}
9188
}]
9289
)
93-
assert not result["applicable"]
9490
assert result[ToolCallAccuracyEvaluator._RESULT_KEY] == ToolCallAccuracyEvaluator._NOT_APPLICABLE_RESULT
9591
assert ToolCallAccuracyEvaluator._TOOL_DEFINITIONS_MISSING_MESSAGE in result[f"{ToolCallAccuracyEvaluator._RESULT_KEY}_reason"]

sdk/evaluation/azure-ai-evaluation/tests/unittests/test_tool_call_accuracy_evaluator.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,6 @@ def test_evaluate_tools_valid1(self, mock_model_config):
104104
assert "per_tool_call_details" in result
105105
assert ToolCallAccuracyEvaluator._EXCESS_TOOL_CALLS_KEY in result
106106
assert ToolCallAccuracyEvaluator._MISSING_TOOL_CALLS_KEY in result
107-
assert result["applicable"] is True
108107

109108
def test_evaluate_tools_valid2(self, mock_model_config):
110109
evaluator = ToolCallAccuracyEvaluator(model_config=mock_model_config)
@@ -163,7 +162,6 @@ def test_evaluate_tools_valid2(self, mock_model_config):
163162
assert "per_tool_call_details" in result
164163
assert ToolCallAccuracyEvaluator._EXCESS_TOOL_CALLS_KEY in result
165164
assert ToolCallAccuracyEvaluator._MISSING_TOOL_CALLS_KEY in result
166-
assert result["applicable"] is True
167165

168166
def test_evaluate_tools_valid3(self, mock_model_config):
169167
evaluator = ToolCallAccuracyEvaluator(model_config=mock_model_config)
@@ -222,7 +220,6 @@ def test_evaluate_tools_valid3(self, mock_model_config):
222220
assert "per_tool_call_details" in result
223221
assert ToolCallAccuracyEvaluator._EXCESS_TOOL_CALLS_KEY in result
224222
assert ToolCallAccuracyEvaluator._MISSING_TOOL_CALLS_KEY in result
225-
assert result["applicable"] is True
226223

227224
def test_evaluate_tools_one_eval_fails(self, mock_model_config):
228225
with pytest.raises(EvaluationException) as exc_info:
@@ -311,7 +308,6 @@ def test_evaluate_tools_some_not_applicable(self, mock_model_config):
311308
assert result["per_tool_call_details"] == {}
312309
assert result[ToolCallAccuracyEvaluator._EXCESS_TOOL_CALLS_KEY] == {}
313310
assert result[ToolCallAccuracyEvaluator._MISSING_TOOL_CALLS_KEY] == {}
314-
assert result["applicable"] is False
315311

316312
def test_evaluate_tools_all_not_applicable(self, mock_model_config):
317313
evaluator = ToolCallAccuracyEvaluator(model_config=mock_model_config)
@@ -351,7 +347,6 @@ def test_evaluate_tools_all_not_applicable(self, mock_model_config):
351347
assert result["per_tool_call_details"] == {}
352348
assert result[ToolCallAccuracyEvaluator._EXCESS_TOOL_CALLS_KEY] == {}
353349
assert result[ToolCallAccuracyEvaluator._MISSING_TOOL_CALLS_KEY] == {}
354-
assert result["applicable"] is False
355350

356351
def test_evaluate_tools_no_tools(self, mock_model_config):
357352
evaluator = ToolCallAccuracyEvaluator(model_config=mock_model_config)
@@ -383,5 +378,4 @@ def test_evaluate_tools_no_tools(self, mock_model_config):
383378
assert result[f"{key}_reason"] == ToolCallAccuracyEvaluator._NO_TOOL_CALLS_MESSAGE
384379
assert result["per_tool_call_details"] == {}
385380
assert result[ToolCallAccuracyEvaluator._EXCESS_TOOL_CALLS_KEY] == {}
386-
assert result[ToolCallAccuracyEvaluator._MISSING_TOOL_CALLS_KEY] == {}
387-
assert result["applicable"] is False
381+
assert result[ToolCallAccuracyEvaluator._MISSING_TOOL_CALLS_KEY] == {}

0 commit comments

Comments
 (0)