Reformat base eval using Black

Salma Elshafey · Salma Elshafey · commit c98359303369 · 2025-07-16T13:43:45.000+03:00
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_base_eval.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_base_eval.py
@@ -4,14 +4,34 @@
 
 import inspect
 from abc import ABC, abstractmethod
-from typing import Any, Callable, Dict, Generic, List, TypedDict, TypeVar, Union, cast, final, Optional
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Generic,
+    List,
+    TypedDict,
+    TypeVar,
+    Union,
+    cast,
+    final,
+    Optional,
+)
 
 from azure.ai.evaluation._legacy._adapters.utils import async_run_allowing_running_loop
 from typing_extensions import ParamSpec, TypeAlias, get_overloads
 
-from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
+from azure.ai.evaluation._exceptions import (
+    ErrorBlame,
+    ErrorCategory,
+    ErrorTarget,
+    EvaluationException,
+)
 from azure.ai.evaluation._common.utils import remove_optional_singletons
-from azure.ai.evaluation._constants import _AggregationType, EVALUATION_PASS_FAIL_MAPPING
+from azure.ai.evaluation._constants import (
+    _AggregationType,
+    EVALUATION_PASS_FAIL_MAPPING,
+)
 from azure.ai.evaluation._model_configurations import Conversation
 from azure.ai.evaluation._common._experimental import experimental
 
@@ -101,14 +121,18 @@ def __init__(
         not_singleton_inputs: List[str] = ["conversation", "kwargs"],
         eval_last_turn: bool = False,
         conversation_aggregation_type: _AggregationType = _AggregationType.MEAN,
-        conversation_aggregator_override: Optional[Callable[[List[float]], float]] = None,
+        conversation_aggregator_override: Optional[
+            Callable[[List[float]], float]
+        ] = None,
         _higher_is_better: Optional[bool] = True,
     ):
         self._not_singleton_inputs = not_singleton_inputs
         self._eval_last_turn = eval_last_turn
         self._singleton_inputs = self._derive_singleton_inputs()
         self._async_evaluator = AsyncEvaluatorBase(self._real_call)
-        self._conversation_aggregation_function = GetAggregator(conversation_aggregation_type)
+        self._conversation_aggregation_function = GetAggregator(
+            conversation_aggregation_type
+        )
         self._higher_is_better = _higher_is_better
         self._threshold = threshold
         if conversation_aggregator_override is not None:
@@ -170,13 +194,18 @@ def _derive_singleton_inputs(self) -> List[str]:
         singletons = []
         for call_signature in call_signatures:
             params = call_signature.parameters
-            if any(not_singleton_input in params for not_singleton_input in self._not_singleton_inputs):
+            if any(
+                not_singleton_input in params
+                for not_singleton_input in self._not_singleton_inputs
+            ):
                 continue
             # exclude self since it is not a singleton input
             singletons.extend([p for p in params if p != "self"])
         return singletons
 
-    def _derive_conversation_converter(self) -> Callable[[Dict], List[DerivedEvalInput]]:
+    def _derive_conversation_converter(
+        self,
+    ) -> Callable[[Dict], List[DerivedEvalInput]]:
         """Produce the function that will be used to convert conversations to a list of evaluable inputs.
         This uses the inputs derived from the _derive_singleton_inputs function to determine which
         aspects of a conversation ought to be extracted.
@@ -235,7 +264,9 @@ def converter(conversation: Dict) -> List[DerivedEvalInput]:
 
         return converter
 
-    def _derive_multi_modal_conversation_converter(self) -> Callable[[Dict], List[Dict[str, Any]]]:
+    def _derive_multi_modal_conversation_converter(
+        self,
+    ) -> Callable[[Dict], List[Dict[str, Any]]]:
         """Produce the function that will be used to convert multi-modal conversations to a list of evaluable inputs.
         This uses the inputs derived from the _derive_singleton_inputs function to determine which
         aspects of a conversation ought to be extracted.
@@ -269,12 +300,16 @@ def multi_modal_converter(conversation: Dict) -> List[Dict[str, Any]]:
             if len(user_messages) != len(assistant_messages):
                 raise EvaluationException(
                     message="Mismatched number of user and assistant messages.",
-                    internal_message=("Mismatched number of user and assistant messages."),
+                    internal_message=(
+                        "Mismatched number of user and assistant messages."
+                    ),
                 )
             if len(assistant_messages) > 1:
                 raise EvaluationException(
                     message="Conversation can have only one assistant message.",
-                    internal_message=("Conversation can have only one assistant message."),
+                    internal_message=(
+                        "Conversation can have only one assistant message."
+                    ),
                 )
             eval_conv_inputs = []
             for user_msg, assist_msg in zip(user_messages, assistant_messages):
@@ -283,12 +318,16 @@ def multi_modal_converter(conversation: Dict) -> List[Dict[str, Any]]:
                     conv_messages.append(system_messages[0])
                 conv_messages.append(user_msg)
                 conv_messages.append(assist_msg)
-                eval_conv_inputs.append({"conversation": Conversation(messages=conv_messages)})
+                eval_conv_inputs.append(
+                    {"conversation": Conversation(messages=conv_messages)}
+                )
             return eval_conv_inputs
 
         return multi_modal_converter
 
-    def _convert_kwargs_to_eval_input(self, **kwargs) -> Union[List[Dict], List[DerivedEvalInput], Dict[str, Any]]:
+    def _convert_kwargs_to_eval_input(
+        self, **kwargs
+    ) -> Union[List[Dict], List[DerivedEvalInput], Dict[str, Any]]:
         """Convert an arbitrary input into a list of inputs for evaluators.
         It is assumed that evaluators generally make use of their inputs in one of two ways.
         Either they receive a collection of keyname inputs that are all single values
@@ -353,11 +392,17 @@ def _is_multi_modal_conversation(self, conversation: Dict) -> bool:
             if "content" in message:
                 content = message.get("content", "")
                 if isinstance(content, list):
-                    if any(item.get("type") == "image_url" and "url" in item.get("image_url", {}) for item in content):
+                    if any(
+                        item.get("type") == "image_url"
+                        and "url" in item.get("image_url", {})
+                        for item in content
+                    ):
                         return True
         return False
 
-    def _aggregate_results(self, per_turn_results: List[DoEvalResult[T_EvalValue]]) -> AggregateResult[T_EvalValue]:
+    def _aggregate_results(
+        self, per_turn_results: List[DoEvalResult[T_EvalValue]]
+    ) -> AggregateResult[T_EvalValue]:
         """Aggregate the evaluation results of each conversation turn into a single result.
 
         Exact implementation might need to vary slightly depending on the results produced.
@@ -387,12 +432,16 @@ def _aggregate_results(self, per_turn_results: List[DoEvalResult[T_EvalValue]])
         # Find and average all numeric values
         for metric, values in evaluation_per_turn.items():
             if all(isinstance(value, (int, float)) for value in values):
-                aggregated[metric] = self._conversation_aggregation_function(cast(List[Union[int, float]], values))
+                aggregated[metric] = self._conversation_aggregation_function(
+                    cast(List[Union[int, float]], values)
+                )
         # Slap the per-turn results back in.
         aggregated["evaluation_per_turn"] = evaluation_per_turn
         return aggregated
 
-    async def _real_call(self, **kwargs) -> Union[DoEvalResult[T_EvalValue], AggregateResult[T_EvalValue]]:
+    async def _real_call(
+        self, **kwargs
+    ) -> Union[DoEvalResult[T_EvalValue], AggregateResult[T_EvalValue]]:
         """The asynchronous call where real end-to-end evaluation logic is performed.
 
         :keyword kwargs: The inputs to evaluate.
@@ -445,7 +494,9 @@ def _to_async(self) -> "AsyncEvaluatorBase":
 
     @experimental
     @final
-    def _set_conversation_aggregation_type(self, conversation_aggregation_type: _AggregationType) -> None:
+    def _set_conversation_aggregation_type(
+        self, conversation_aggregation_type: _AggregationType
+    ) -> None:
         """Input a conversation aggregation type to re-assign the aggregator function used by this evaluator for
         multi-turn conversations. This aggregator is used to combine numeric outputs from each evaluation of a
         multi-turn conversation into a single top-level result.
@@ -454,11 +505,15 @@ def _set_conversation_aggregation_type(self, conversation_aggregation_type: _Agg
             results of a conversation to produce a single result.
         :type conversation_aggregation_type: ~azure.ai.evaluation._AggregationType
         """
-        self._conversation_aggregation_function = GetAggregator(conversation_aggregation_type)
+        self._conversation_aggregation_function = GetAggregator(
+            conversation_aggregation_type
+        )
 
     @experimental
     @final
-    def _set_conversation_aggregator(self, aggregator: Callable[[List[float]], float]) -> None:
+    def _set_conversation_aggregator(
+        self, aggregator: Callable[[List[float]], float]
+    ) -> None:
         """Set the conversation aggregator function directly. This function will be applied to all numeric outputs
         of an evaluator when it evaluates a conversation with multiple-turns thus ends up with multiple results per
         evaluation that is needs to coalesce into a single result. Use when built-in aggregators do not
@@ -488,7 +543,9 @@ class AsyncEvaluatorBase:
     to ensure that no one ever needs to extend or otherwise modify this class directly.
     """
 
-    def __init__(self, real_call):  # DO NOT ADD TYPEHINT PROMPT FLOW WILL SCREAM AT YOU ABOUT META GENERATION
+    def __init__(
+        self, real_call
+    ):  # DO NOT ADD TYPEHINT PROMPT FLOW WILL SCREAM AT YOU ABOUT META GENERATION
         self._real_call = real_call
 
     # Don't look at my shame. Nothing to see here....