Fix randomization seed consistency issue in azure-ai-evaluation SDK (#42047)

Copilot · slister1001 · web-flow · commit 54d45994af1e · 2025-07-17T15:57:02.000-04:00
* Initial plan

* Fix randomization seed consistency issue in azure-ai-evaluation SDK

Co-authored-by: slister1001 &lt;103153180+slister1001@users.noreply.github.com&gt;

* fix black

---------

Co-authored-by: copilot-swe-agent[bot] &lt;198982749+Copilot@users.noreply.github.com&gt;
Co-authored-by: slister1001 &lt;103153180+slister1001@users.noreply.github.com&gt;
Co-authored-by: Sydney Lister &lt;sydneylister@microsoft.com&gt;
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py
@@ -290,6 +290,7 @@ async def callback(
                 target=callback,
                 text=source_text if source_text else "",
                 concurrent_async_tasks=concurrent_async_tasks,
+                randomization_seed=randomization_seed,
             )
 
         ## Run AdversarialSimulator
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_simulator.py
@@ -239,8 +239,11 @@ async def __call__(
             # So randomize a the selection instead of the parameter list directly,
             # or a potentially large deep copy.
             if randomization_seed is not None:
-                random.seed(randomization_seed)
-            random.shuffle(templates)
+                # Create a local random instance to avoid polluting global state
+                local_random = random.Random(randomization_seed)
+                local_random.shuffle(templates)
+            else:
+                random.shuffle(templates)
 
         # Prepare task parameters based on scenario - but use a single append call for all scenarios
         tasks = []
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py
@@ -5,7 +5,8 @@
 # noqa: E501
 import asyncio
 import logging
-from typing import Callable, cast, Union
+import random
+from typing import Callable, cast, Union, Optional
 
 from tqdm import tqdm
 
@@ -105,6 +106,7 @@ async def __call__(
         api_call_retry_sleep_sec: int = 1,
         api_call_delay_sec: int = 0,
         concurrent_async_task: int = 3,
+        randomization_seed: Optional[int] = None,
         **kwargs,
     ):
         """
@@ -130,6 +132,9 @@ async def __call__(
         :keyword concurrent_async_task: The number of asynchronous tasks to run concurrently during the simulation.
             Defaults to 3.
         :paramtype concurrent_async_task: int
+        :keyword randomization_seed: The seed used to randomize prompt selection. If unset, the system's
+            default seed is used. Defaults to None.
+        :paramtype randomization_seed: Optional[int]
         :return: A list of dictionaries, each representing a simulated conversation. Each dictionary contains:
 
          - 'template_parameters': A dictionary with parameters used in the conversation template,
@@ -190,6 +195,13 @@ async def __call__(
             ncols=100,
             unit="simulations",
         )
+
+        # Apply randomization to templates if seed is provided
+        if randomization_seed is not None:
+            # Create a local random instance to avoid polluting global state
+            local_random = random.Random(randomization_seed)
+            local_random.shuffle(templates)
+
         for template in templates:
             for parameter in template.template_parameters:
                 tasks.append(
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py
@@ -7,6 +7,7 @@
 import importlib.resources as pkg_resources
 import json
 import os
+import random
 import re
 import warnings
 from typing import Any, Callable, Dict, List, Optional, Union, Tuple
@@ -104,6 +105,7 @@ async def __call__(
         user_simulator_prompty_options: Dict[str, Any] = {},
         conversation_turns: List[List[Union[str, Dict[str, Any]]]] = [],
         concurrent_async_tasks: int = 5,
+        randomization_seed: Optional[int] = None,
         **kwargs,
     ) -> List[JsonLineChatProtocol]:
         """
@@ -134,6 +136,9 @@ async def __call__(
         :keyword concurrent_async_tasks: The number of asynchronous tasks to run concurrently during the simulation.
             Defaults to 5.
         :paramtype concurrent_async_tasks: int
+        :keyword randomization_seed: The seed used to randomize task/query order. If unset, the system's
+            default seed is used. Defaults to None.
+        :paramtype randomization_seed: Optional[int]
         :return: A list of simulated conversations represented as JsonLineChatProtocol objects.
         :rtype: List[JsonLineChatProtocol]
 
@@ -159,6 +164,13 @@ async def __call__(
                 f"Only the first {num_queries} lines of the specified tasks will be simulated."
             )
 
+        # Apply randomization to tasks if seed is provided
+        if randomization_seed is not None and tasks:
+            # Create a local random instance to avoid polluting global state
+            local_random = random.Random(randomization_seed)
+            tasks = tasks.copy()  # Don't modify the original list
+            local_random.shuffle(tasks)
+
         max_conversation_turns *= 2  # account for both user and assistant turns
 
         prompty_model_config = self.model_config
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_safety_evaluation.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_safety_evaluation.py
@@ -347,3 +347,51 @@ def test_validate_inputs_ungrounded_attributes_multi_turn(self, safety_eval, moc
                 num_turns=3,
             )
         assert "Ungrounded attributes evaluation only supports single-turn conversations" in str(exc_info.value)
+
+    def test_randomization_seed_consistency(self):
+        """Test that the same randomization_seed produces consistent results across multiple invocations."""
+        import random
+
+        # Test that local Random instances with same seed produce same results
+        seed = 42
+        test_data = [f"item_{i}" for i in range(20)]
+
+        # First run
+        data1 = test_data.copy()
+        rng1 = random.Random(seed)
+        rng1.shuffle(data1)
+
+        # Second run with same seed (simulating separate invocation)
+        data2 = test_data.copy()
+        rng2 = random.Random(seed)
+        rng2.shuffle(data2)
+
+        # Should produce identical results
+        assert data1 == data2, "Same randomization_seed should produce identical results"
+
+        # Test that different seeds produce different results
+        data3 = test_data.copy()
+        rng3 = random.Random(123)
+        rng3.shuffle(data3)
+
+        assert data1 != data3, "Different seeds should produce different results"
+
+    def test_local_random_no_global_state_pollution(self):
+        """Test that using local Random instances doesn't affect global random state."""
+        import random
+
+        # Set global state
+        random.seed(100)
+        initial_value = random.random()
+
+        # Reset to same state
+        random.seed(100)
+
+        # Use local random instance (simulating what our fixed simulators do)
+        local_random = random.Random(42)
+        local_random.shuffle([1, 2, 3, 4, 5])
+        local_random.choice([1, 2, 3])
+
+        # Global state should be unchanged
+        after_value = random.random()
+        assert initial_value == after_value, "Local Random usage should not affect global state"

Original file line number	Diff line number	Diff line change
`@@ -290,6 +290,7 @@ async def callback(`
`290`	`290`	`target=callback,`
`291`	`291`	`text=source_text if source_text else "",`
`292`	`292`	`concurrent_async_tasks=concurrent_async_tasks,`
	`293`	`+ randomization_seed=randomization_seed,`
`293`	`294`	`)`
`294`	`295`
`295`	`296`	`## Run AdversarialSimulator`