@@ -87,7 +87,8 @@ function RLCore.Experiment(
87
87
end ,
88
88
DoEveryNEpisode () do t, agent, env
89
89
with_logger (lg) do
90
- @info " training" reward = total_reward_per_episode. rewards[end ]
90
+ @info " training" reward = total_reward_per_episode. rewards[end ] log_step_increment =
91
+ 0
91
92
end
92
93
end ,
93
94
DoEveryNStep (10000 ) do t, agent, env
@@ -180,7 +181,8 @@ function RLCore.Experiment(
180
181
end ,
181
182
DoEveryNEpisode () do t, agent, env
182
183
with_logger (lg) do
183
- @info " training" reward = total_reward_per_episode. rewards[end ]
184
+ @info " training" reward = total_reward_per_episode. rewards[end ] log_step_increment =
185
+ 0
184
186
end
185
187
end ,
186
188
DoEveryNStep (10000 ) do t, agent, env
@@ -274,7 +276,8 @@ function RLCore.Experiment(
274
276
end ,
275
277
DoEveryNEpisode () do t, agent, env
276
278
with_logger (lg) do
277
- @info " training" reward = total_reward_per_episode. rewards[end ]
279
+ @info " training" reward = total_reward_per_episode. rewards[end ] log_step_increment =
280
+ 0
278
281
end
279
282
end ,
280
283
DoEveryNStep (10000 ) do t, agent, env
@@ -374,7 +377,8 @@ function RLCore.Experiment(
374
377
end ,
375
378
DoEveryNEpisode () do t, agent, env
376
379
with_logger (lg) do
377
- @info " training" reward = total_reward_per_episode. rewards[end ]
380
+ @info " training" reward = total_reward_per_episode. rewards[end ] log_step_increment =
381
+ 0
378
382
end
379
383
end ,
380
384
DoEveryNStep (10000 ) do t, agent, env
@@ -476,7 +480,8 @@ function RLCore.Experiment(
476
480
end ,
477
481
DoEveryNEpisode () do t, agent, env
478
482
with_logger (lg) do
479
- @info " training" reward = total_reward_per_episode. rewards[end ]
483
+ @info " training" reward = total_reward_per_episode. rewards[end ] log_step_increment =
484
+ 0
480
485
end
481
486
end ,
482
487
DoEveryNStep (10000 ) do t, agent, env
@@ -563,8 +568,14 @@ function RLCore.Experiment(
563
568
critic_loss = agent. policy. learner. critic_loss,
564
569
entropy_loss = agent. policy. learner. entropy_loss,
565
570
loss = agent. policy. learner. loss,
566
- reward = total_reward_per_episode. reward[end ]
567
571
)
572
+ for i in 1 : length (env)
573
+ if get_terminal (env[i])
574
+ @info " training" reward = total_reward_per_episode. rewards[i][end ] log_step_increment =
575
+ 0
576
+ break
577
+ end
578
+ end
568
579
end
569
580
end ,
570
581
DoEveryNStep (10000 ) do t, agent, env
@@ -656,8 +667,14 @@ function RLCore.Experiment(
656
667
critic_loss = agent. policy. learner. critic_loss,
657
668
entropy_loss = agent. policy. learner. entropy_loss,
658
669
loss = agent. policy. learner. loss,
659
- reward = total_reward_per_episode. reward[end ],
660
670
)
671
+ for i in 1 : length (env)
672
+ if get_terminal (env[i])
673
+ @info " training" reward = total_reward_per_episode. rewards[i][end ] log_step_increment =
674
+ 0
675
+ break
676
+ end
677
+ end
661
678
end
662
679
end ,
663
680
DoEveryNStep (10000 ) do t, agent, env
@@ -764,7 +781,8 @@ function RLCore.Experiment(
764
781
end ,
765
782
DoEveryNEpisode () do t, agent, env
766
783
with_logger (lg) do
767
- @info " training" reward = total_reward_per_episode. rewards[end ]
784
+ @info " training" reward = total_reward_per_episode. rewards[end ] log_step_increment =
785
+ 0
768
786
end
769
787
end ,
770
788
DoEveryNStep (10000 ) do t, agent, env
@@ -863,8 +881,14 @@ function RLCore.Experiment(
863
881
actor_loss = agent. policy. learner. actor_loss[end , end ],
864
882
critic_loss = agent. policy. learner. critic_loss[end , end ],
865
883
loss = agent. policy. learner. loss[end , end ],
866
- reward = total_reward_per_episode. reward[end ]
867
884
)
885
+ for i in 1 : length (env)
886
+ if get_terminal (env[i])
887
+ @info " training" reward = total_reward_per_episode. rewards[i][end ] log_step_increment =
888
+ 0
889
+ break
890
+ end
891
+ end
868
892
end
869
893
end ,
870
894
DoEveryNStep (10000 ) do t, agent, env
@@ -944,7 +968,8 @@ function RLCore.Experiment(
944
968
end ,
945
969
DoEveryNEpisode () do t, agent, env
946
970
with_logger (lg) do
947
- @info " training" reward = total_reward_per_episode. rewards[end ]
971
+ @info " training" reward = total_reward_per_episode. rewards[end ] log_step_increment =
972
+ 0
948
973
end
949
974
end ,
950
975
DoEveryNStep (10000 ) do t, agent, env
@@ -1038,7 +1063,8 @@ function RLCore.Experiment(
1038
1063
end ,
1039
1064
DoEveryNEpisode () do t, agent, env
1040
1065
with_logger (lg) do
1041
- @info " training" reward = total_reward_per_episode. rewards[end ]
1066
+ @info " training" reward = total_reward_per_episode. rewards[end ] log_step_increment =
1067
+ 0
1042
1068
end
1043
1069
end ,
1044
1070
DoEveryNStep (10000 ) do t, agent, env
0 commit comments