@@ -500,6 +500,8 @@ def __init__(self, algo_class: Optional[type] = None):
500
500
self .evaluation_duration = 10
501
501
self .evaluation_duration_unit = "episodes"
502
502
self .evaluation_sample_timeout_s = 120.0
503
+ self .evaluation_auto_duration_min_env_steps_per_sample = 100
504
+ self .evaluation_auto_duration_max_env_steps_per_sample = 2000
503
505
self .evaluation_parallel_to_training = False
504
506
self .evaluation_force_reset_envs_before_iteration = True
505
507
self .evaluation_config = None
@@ -2559,6 +2561,8 @@ def evaluation(
2559
2561
evaluation_interval : Optional [int ] = NotProvided ,
2560
2562
evaluation_duration : Optional [Union [int , str ]] = NotProvided ,
2561
2563
evaluation_duration_unit : Optional [str ] = NotProvided ,
2564
+ evaluation_auto_duration_min_env_steps_per_sample : Optional [int ] = NotProvided ,
2565
+ evaluation_auto_duration_max_env_steps_per_sample : Optional [int ] = NotProvided ,
2562
2566
evaluation_sample_timeout_s : Optional [float ] = NotProvided ,
2563
2567
evaluation_parallel_to_training : Optional [bool ] = NotProvided ,
2564
2568
evaluation_force_reset_envs_before_iteration : Optional [bool ] = NotProvided ,
@@ -2597,6 +2601,14 @@ def evaluation(
2597
2601
evaluation_duration_unit: The unit, with which to count the evaluation
2598
2602
duration. Either "episodes" (default) or "timesteps". Note that this
2599
2603
setting is ignored if `evaluation_duration="auto"`.
2604
+ evaluation_auto_duration_min_env_steps_per_sample: If `evaluation_duration`
2605
+ is "auto" (in which case `evaluation_duration_unit` is always
2606
+ "timesteps"), at least how many timesteps should be done per remote
2607
+ `sample()` call.
2608
+ evaluation_auto_duration_max_env_steps_per_sample: If `evaluation_duration`
2609
+ is "auto" (in which case `evaluation_duration_unit` is always
2610
+ "timesteps"), at most how many timesteps should be done per remote
2611
+ `sample()` call.
2600
2612
evaluation_sample_timeout_s: The timeout (in seconds) for evaluation workers
2601
2613
to sample a complete episode in the case your config settings are:
2602
2614
`evaluation_duration != auto` and `evaluation_duration_unit=episode`.
@@ -2685,6 +2697,14 @@ def evaluation(
2685
2697
self .evaluation_duration = evaluation_duration
2686
2698
if evaluation_duration_unit is not NotProvided :
2687
2699
self .evaluation_duration_unit = evaluation_duration_unit
2700
+ if evaluation_auto_duration_min_env_steps_per_sample is not NotProvided :
2701
+ self .evaluation_auto_duration_min_env_steps_per_sample = (
2702
+ evaluation_auto_duration_min_env_steps_per_sample
2703
+ )
2704
+ if evaluation_auto_duration_max_env_steps_per_sample is not NotProvided :
2705
+ self .evaluation_auto_duration_max_env_steps_per_sample = (
2706
+ evaluation_auto_duration_max_env_steps_per_sample
2707
+ )
2688
2708
if evaluation_sample_timeout_s is not NotProvided :
2689
2709
self .evaluation_sample_timeout_s = evaluation_sample_timeout_s
2690
2710
if evaluation_parallel_to_training is not NotProvided :
0 commit comments