Skip to content
This repository was archived by the owner on May 6, 2021. It is now read-only.

Commit b7bb7a0

Browse files
authored
Fix (#85)
* fixed bug in reward logging. due to multithread env does not have POST_EPISODE_STAGE. See: ReinforcementLearningCore\src\core\run.jl, line 46. * fixed logging error. due to @info was called multiple times without log_step_increment=0
1 parent d21b82d commit b7bb7a0

File tree

1 file changed

+37
-11
lines changed

1 file changed

+37
-11
lines changed

src/experiments/rl_envs.jl

Lines changed: 37 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,8 @@ function RLCore.Experiment(
8787
end,
8888
DoEveryNEpisode() do t, agent, env
8989
with_logger(lg) do
90-
@info "training" reward = total_reward_per_episode.rewards[end]
90+
@info "training" reward = total_reward_per_episode.rewards[end] log_step_increment =
91+
0
9192
end
9293
end,
9394
DoEveryNStep(10000) do t, agent, env
@@ -180,7 +181,8 @@ function RLCore.Experiment(
180181
end,
181182
DoEveryNEpisode() do t, agent, env
182183
with_logger(lg) do
183-
@info "training" reward = total_reward_per_episode.rewards[end]
184+
@info "training" reward = total_reward_per_episode.rewards[end] log_step_increment =
185+
0
184186
end
185187
end,
186188
DoEveryNStep(10000) do t, agent, env
@@ -274,7 +276,8 @@ function RLCore.Experiment(
274276
end,
275277
DoEveryNEpisode() do t, agent, env
276278
with_logger(lg) do
277-
@info "training" reward = total_reward_per_episode.rewards[end]
279+
@info "training" reward = total_reward_per_episode.rewards[end] log_step_increment =
280+
0
278281
end
279282
end,
280283
DoEveryNStep(10000) do t, agent, env
@@ -374,7 +377,8 @@ function RLCore.Experiment(
374377
end,
375378
DoEveryNEpisode() do t, agent, env
376379
with_logger(lg) do
377-
@info "training" reward = total_reward_per_episode.rewards[end]
380+
@info "training" reward = total_reward_per_episode.rewards[end] log_step_increment =
381+
0
378382
end
379383
end,
380384
DoEveryNStep(10000) do t, agent, env
@@ -476,7 +480,8 @@ function RLCore.Experiment(
476480
end,
477481
DoEveryNEpisode() do t, agent, env
478482
with_logger(lg) do
479-
@info "training" reward = total_reward_per_episode.rewards[end]
483+
@info "training" reward = total_reward_per_episode.rewards[end] log_step_increment =
484+
0
480485
end
481486
end,
482487
DoEveryNStep(10000) do t, agent, env
@@ -563,8 +568,14 @@ function RLCore.Experiment(
563568
critic_loss = agent.policy.learner.critic_loss,
564569
entropy_loss = agent.policy.learner.entropy_loss,
565570
loss = agent.policy.learner.loss,
566-
reward = total_reward_per_episode.reward[end]
567571
)
572+
for i in 1:length(env)
573+
if get_terminal(env[i])
574+
@info "training" reward = total_reward_per_episode.rewards[i][end] log_step_increment =
575+
0
576+
break
577+
end
578+
end
568579
end
569580
end,
570581
DoEveryNStep(10000) do t, agent, env
@@ -656,8 +667,14 @@ function RLCore.Experiment(
656667
critic_loss = agent.policy.learner.critic_loss,
657668
entropy_loss = agent.policy.learner.entropy_loss,
658669
loss = agent.policy.learner.loss,
659-
reward = total_reward_per_episode.reward[end],
660670
)
671+
for i in 1:length(env)
672+
if get_terminal(env[i])
673+
@info "training" reward = total_reward_per_episode.rewards[i][end] log_step_increment =
674+
0
675+
break
676+
end
677+
end
661678
end
662679
end,
663680
DoEveryNStep(10000) do t, agent, env
@@ -764,7 +781,8 @@ function RLCore.Experiment(
764781
end,
765782
DoEveryNEpisode() do t, agent, env
766783
with_logger(lg) do
767-
@info "training" reward = total_reward_per_episode.rewards[end]
784+
@info "training" reward = total_reward_per_episode.rewards[end] log_step_increment =
785+
0
768786
end
769787
end,
770788
DoEveryNStep(10000) do t, agent, env
@@ -863,8 +881,14 @@ function RLCore.Experiment(
863881
actor_loss = agent.policy.learner.actor_loss[end, end],
864882
critic_loss = agent.policy.learner.critic_loss[end, end],
865883
loss = agent.policy.learner.loss[end, end],
866-
reward = total_reward_per_episode.reward[end]
867884
)
885+
for i in 1:length(env)
886+
if get_terminal(env[i])
887+
@info "training" reward = total_reward_per_episode.rewards[i][end] log_step_increment =
888+
0
889+
break
890+
end
891+
end
868892
end
869893
end,
870894
DoEveryNStep(10000) do t, agent, env
@@ -944,7 +968,8 @@ function RLCore.Experiment(
944968
end,
945969
DoEveryNEpisode() do t, agent, env
946970
with_logger(lg) do
947-
@info "training" reward = total_reward_per_episode.rewards[end]
971+
@info "training" reward = total_reward_per_episode.rewards[end] log_step_increment =
972+
0
948973
end
949974
end,
950975
DoEveryNStep(10000) do t, agent, env
@@ -1038,7 +1063,8 @@ function RLCore.Experiment(
10381063
end,
10391064
DoEveryNEpisode() do t, agent, env
10401065
with_logger(lg) do
1041-
@info "training" reward = total_reward_per_episode.rewards[end]
1066+
@info "training" reward = total_reward_per_episode.rewards[end] log_step_increment =
1067+
0
10421068
end
10431069
end,
10441070
DoEveryNStep(10000) do t, agent, env

0 commit comments

Comments
 (0)