Skip to content

Commit 7eb8d91

Browse files
authored
[RLlib] Make min/max env steps per evaluation sample call configurable for duration="auto". (#51637)
1 parent 5211801 commit 7eb8d91

File tree

3 files changed

+26
-24
lines changed

3 files changed

+26
-24
lines changed

rllib/algorithms/algorithm.py

Lines changed: 5 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,6 @@
131131
NUM_AGENT_STEPS_TRAINED_LIFETIME,
132132
NUM_ENV_STEPS_SAMPLED,
133133
NUM_ENV_STEPS_SAMPLED_LIFETIME,
134-
NUM_ENV_STEPS_SAMPLED_PER_SECOND,
135134
NUM_ENV_STEPS_SAMPLED_THIS_ITER,
136135
NUM_ENV_STEPS_SAMPLED_FOR_EVALUATION_THIS_ITER,
137136
NUM_ENV_STEPS_TRAINED,
@@ -1331,12 +1330,10 @@ def _env_runner_remote(worker, num, round, iter):
13311330
# Compute rough number of timesteps it takes for a single EnvRunner
13321331
# to occupy the estimated (parallelly running) train step.
13331332
_num = min(
1334-
# Cap at 20k to not put too much memory strain on EnvRunners.
1335-
20000,
1333+
# Clamp number of steps to take between a max and a min.
1334+
self.config.evaluation_auto_duration_max_env_steps_per_sample,
13361335
max(
1337-
# Low-cap at 100 to avoid possibly negative rollouts or very
1338-
# short ones.
1339-
100,
1336+
self.config.evaluation_auto_duration_min_env_steps_per_sample,
13401337
(
13411338
# How much time do we have left?
13421339
(train_mean_time - (time.time() - t0))
@@ -1346,8 +1343,9 @@ def _env_runner_remote(worker, num, round, iter):
13461343
(
13471344
EVALUATION_RESULTS,
13481345
ENV_RUNNER_RESULTS,
1349-
NUM_ENV_STEPS_SAMPLED_PER_SECOND,
1346+
NUM_ENV_STEPS_SAMPLED_LIFETIME,
13501347
),
1348+
throughput=True,
13511349
default=0.0,
13521350
)
13531351
/ num_healthy_workers
@@ -3088,22 +3086,6 @@ def _run_one_evaluation(
30883086
eval_results = self.evaluate(
30893087
parallel_train_future=parallel_train_future
30903088
)
3091-
# TODO (sven): Properly support throughput/sec measurements within
3092-
# `self.metrics.log_time()` call.
3093-
self.metrics.log_value(
3094-
key=(
3095-
EVALUATION_RESULTS,
3096-
ENV_RUNNER_RESULTS,
3097-
NUM_ENV_STEPS_SAMPLED_PER_SECOND,
3098-
),
3099-
value=(
3100-
eval_results.get(ENV_RUNNER_RESULTS, {}).get(
3101-
NUM_ENV_STEPS_SAMPLED, 0
3102-
)
3103-
/ self.metrics.peek((TIMERS, EVALUATION_ITERATION_TIMER))
3104-
),
3105-
)
3106-
31073089
else:
31083090
with self._timers[EVALUATION_ITERATION_TIMER]:
31093091
eval_results = self.evaluate(

rllib/algorithms/algorithm_config.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -500,6 +500,8 @@ def __init__(self, algo_class: Optional[type] = None):
500500
self.evaluation_duration = 10
501501
self.evaluation_duration_unit = "episodes"
502502
self.evaluation_sample_timeout_s = 120.0
503+
self.evaluation_auto_duration_min_env_steps_per_sample = 100
504+
self.evaluation_auto_duration_max_env_steps_per_sample = 2000
503505
self.evaluation_parallel_to_training = False
504506
self.evaluation_force_reset_envs_before_iteration = True
505507
self.evaluation_config = None
@@ -2559,6 +2561,8 @@ def evaluation(
25592561
evaluation_interval: Optional[int] = NotProvided,
25602562
evaluation_duration: Optional[Union[int, str]] = NotProvided,
25612563
evaluation_duration_unit: Optional[str] = NotProvided,
2564+
evaluation_auto_duration_min_env_steps_per_sample: Optional[int] = NotProvided,
2565+
evaluation_auto_duration_max_env_steps_per_sample: Optional[int] = NotProvided,
25622566
evaluation_sample_timeout_s: Optional[float] = NotProvided,
25632567
evaluation_parallel_to_training: Optional[bool] = NotProvided,
25642568
evaluation_force_reset_envs_before_iteration: Optional[bool] = NotProvided,
@@ -2597,6 +2601,14 @@ def evaluation(
25972601
evaluation_duration_unit: The unit, with which to count the evaluation
25982602
duration. Either "episodes" (default) or "timesteps". Note that this
25992603
setting is ignored if `evaluation_duration="auto"`.
2604+
evaluation_auto_duration_min_env_steps_per_sample: If `evaluation_duration`
2605+
is "auto" (in which case `evaluation_duration_unit` is always
2606+
"timesteps"), at least how many timesteps should be done per remote
2607+
`sample()` call.
2608+
evaluation_auto_duration_max_env_steps_per_sample: If `evaluation_duration`
2609+
is "auto" (in which case `evaluation_duration_unit` is always
2610+
"timesteps"), at most how many timesteps should be done per remote
2611+
`sample()` call.
26002612
evaluation_sample_timeout_s: The timeout (in seconds) for evaluation workers
26012613
to sample a complete episode in the case your config settings are:
26022614
`evaluation_duration != auto` and `evaluation_duration_unit=episode`.
@@ -2685,6 +2697,14 @@ def evaluation(
26852697
self.evaluation_duration = evaluation_duration
26862698
if evaluation_duration_unit is not NotProvided:
26872699
self.evaluation_duration_unit = evaluation_duration_unit
2700+
if evaluation_auto_duration_min_env_steps_per_sample is not NotProvided:
2701+
self.evaluation_auto_duration_min_env_steps_per_sample = (
2702+
evaluation_auto_duration_min_env_steps_per_sample
2703+
)
2704+
if evaluation_auto_duration_max_env_steps_per_sample is not NotProvided:
2705+
self.evaluation_auto_duration_max_env_steps_per_sample = (
2706+
evaluation_auto_duration_max_env_steps_per_sample
2707+
)
26882708
if evaluation_sample_timeout_s is not NotProvided:
26892709
self.evaluation_sample_timeout_s = evaluation_sample_timeout_s
26902710
if evaluation_parallel_to_training is not NotProvided:

rllib/utils/metrics/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
NUM_AGENT_STEPS_SAMPLED_THIS_ITER = "num_agent_steps_sampled_this_iter" # @OldAPIStack
2727
NUM_ENV_STEPS_SAMPLED = "num_env_steps_sampled"
2828
NUM_ENV_STEPS_SAMPLED_LIFETIME = "num_env_steps_sampled_lifetime"
29-
NUM_ENV_STEPS_SAMPLED_PER_SECOND = "num_env_steps_sampled_per_second"
29+
NUM_ENV_STEPS_SAMPLED_PER_SECOND = "num_env_steps_sampled_per_second" # Deprecated
3030
NUM_ENV_STEPS_SAMPLED_THIS_ITER = "num_env_steps_sampled_this_iter" # @OldAPIStack
3131
NUM_ENV_STEPS_SAMPLED_FOR_EVALUATION_THIS_ITER = (
3232
"num_env_steps_sampled_for_evaluation_this_iter"

0 commit comments

Comments
 (0)