Skip to content

Commit 54d4599

Browse files
Copilotslister1001
andauthored
Fix randomization seed consistency issue in azure-ai-evaluation SDK (#42047)
* Initial plan * Fix randomization seed consistency issue in azure-ai-evaluation SDK Co-authored-by: slister1001 <103153180+slister1001@users.noreply.github.com> * fix black --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: slister1001 <103153180+slister1001@users.noreply.github.com> Co-authored-by: Sydney Lister <sydneylister@microsoft.com>
1 parent efa468f commit 54d4599

File tree

5 files changed

+79
-3
lines changed

5 files changed

+79
-3
lines changed

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_safety_evaluation/_safety_evaluation.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,7 @@ async def callback(
290290
target=callback,
291291
text=source_text if source_text else "",
292292
concurrent_async_tasks=concurrent_async_tasks,
293+
randomization_seed=randomization_seed,
293294
)
294295

295296
## Run AdversarialSimulator

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_simulator.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -239,8 +239,11 @@ async def __call__(
239239
# So randomize a the selection instead of the parameter list directly,
240240
# or a potentially large deep copy.
241241
if randomization_seed is not None:
242-
random.seed(randomization_seed)
243-
random.shuffle(templates)
242+
# Create a local random instance to avoid polluting global state
243+
local_random = random.Random(randomization_seed)
244+
local_random.shuffle(templates)
245+
else:
246+
random.shuffle(templates)
244247

245248
# Prepare task parameters based on scenario - but use a single append call for all scenarios
246249
tasks = []

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
# noqa: E501
66
import asyncio
77
import logging
8-
from typing import Callable, cast, Union
8+
import random
9+
from typing import Callable, cast, Union, Optional
910

1011
from tqdm import tqdm
1112

@@ -105,6 +106,7 @@ async def __call__(
105106
api_call_retry_sleep_sec: int = 1,
106107
api_call_delay_sec: int = 0,
107108
concurrent_async_task: int = 3,
109+
randomization_seed: Optional[int] = None,
108110
**kwargs,
109111
):
110112
"""
@@ -130,6 +132,9 @@ async def __call__(
130132
:keyword concurrent_async_task: The number of asynchronous tasks to run concurrently during the simulation.
131133
Defaults to 3.
132134
:paramtype concurrent_async_task: int
135+
:keyword randomization_seed: The seed used to randomize prompt selection. If unset, the system's
136+
default seed is used. Defaults to None.
137+
:paramtype randomization_seed: Optional[int]
133138
:return: A list of dictionaries, each representing a simulated conversation. Each dictionary contains:
134139
135140
- 'template_parameters': A dictionary with parameters used in the conversation template,
@@ -190,6 +195,13 @@ async def __call__(
190195
ncols=100,
191196
unit="simulations",
192197
)
198+
199+
# Apply randomization to templates if seed is provided
200+
if randomization_seed is not None:
201+
# Create a local random instance to avoid polluting global state
202+
local_random = random.Random(randomization_seed)
203+
local_random.shuffle(templates)
204+
193205
for template in templates:
194206
for parameter in template.template_parameters:
195207
tasks.append(

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import importlib.resources as pkg_resources
88
import json
99
import os
10+
import random
1011
import re
1112
import warnings
1213
from typing import Any, Callable, Dict, List, Optional, Union, Tuple
@@ -104,6 +105,7 @@ async def __call__(
104105
user_simulator_prompty_options: Dict[str, Any] = {},
105106
conversation_turns: List[List[Union[str, Dict[str, Any]]]] = [],
106107
concurrent_async_tasks: int = 5,
108+
randomization_seed: Optional[int] = None,
107109
**kwargs,
108110
) -> List[JsonLineChatProtocol]:
109111
"""
@@ -134,6 +136,9 @@ async def __call__(
134136
:keyword concurrent_async_tasks: The number of asynchronous tasks to run concurrently during the simulation.
135137
Defaults to 5.
136138
:paramtype concurrent_async_tasks: int
139+
:keyword randomization_seed: The seed used to randomize task/query order. If unset, the system's
140+
default seed is used. Defaults to None.
141+
:paramtype randomization_seed: Optional[int]
137142
:return: A list of simulated conversations represented as JsonLineChatProtocol objects.
138143
:rtype: List[JsonLineChatProtocol]
139144
@@ -159,6 +164,13 @@ async def __call__(
159164
f"Only the first {num_queries} lines of the specified tasks will be simulated."
160165
)
161166

167+
# Apply randomization to tasks if seed is provided
168+
if randomization_seed is not None and tasks:
169+
# Create a local random instance to avoid polluting global state
170+
local_random = random.Random(randomization_seed)
171+
tasks = tasks.copy() # Don't modify the original list
172+
local_random.shuffle(tasks)
173+
162174
max_conversation_turns *= 2 # account for both user and assistant turns
163175

164176
prompty_model_config = self.model_config

sdk/evaluation/azure-ai-evaluation/tests/unittests/test_safety_evaluation.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,3 +347,51 @@ def test_validate_inputs_ungrounded_attributes_multi_turn(self, safety_eval, moc
347347
num_turns=3,
348348
)
349349
assert "Ungrounded attributes evaluation only supports single-turn conversations" in str(exc_info.value)
350+
351+
def test_randomization_seed_consistency(self):
352+
"""Test that the same randomization_seed produces consistent results across multiple invocations."""
353+
import random
354+
355+
# Test that local Random instances with same seed produce same results
356+
seed = 42
357+
test_data = [f"item_{i}" for i in range(20)]
358+
359+
# First run
360+
data1 = test_data.copy()
361+
rng1 = random.Random(seed)
362+
rng1.shuffle(data1)
363+
364+
# Second run with same seed (simulating separate invocation)
365+
data2 = test_data.copy()
366+
rng2 = random.Random(seed)
367+
rng2.shuffle(data2)
368+
369+
# Should produce identical results
370+
assert data1 == data2, "Same randomization_seed should produce identical results"
371+
372+
# Test that different seeds produce different results
373+
data3 = test_data.copy()
374+
rng3 = random.Random(123)
375+
rng3.shuffle(data3)
376+
377+
assert data1 != data3, "Different seeds should produce different results"
378+
379+
def test_local_random_no_global_state_pollution(self):
380+
"""Test that using local Random instances doesn't affect global random state."""
381+
import random
382+
383+
# Set global state
384+
random.seed(100)
385+
initial_value = random.random()
386+
387+
# Reset to same state
388+
random.seed(100)
389+
390+
# Use local random instance (simulating what our fixed simulators do)
391+
local_random = random.Random(42)
392+
local_random.shuffle([1, 2, 3, 4, 5])
393+
local_random.choice([1, 2, 3])
394+
395+
# Global state should be unchanged
396+
after_value = random.random()
397+
assert initial_value == after_value, "Local Random usage should not affect global state"

0 commit comments

Comments
 (0)