Skip to content
This repository was archived by the owner on May 6, 2021. It is now read-only.

Commit 87a63f0

Browse files
authored
added more tensorboard logs in rl experiments ... (#81)
* added Loss values for DDPG policy * added more tensorboard logs in rl experiments. adjusted stop conditions, for faster testing. * formatted rl_envs.jl with JuliaFormatter * bug fix in rl_envs.jl 1. use single @info in each step. 2. added reward log in some experiments. 3. increased step number for BasicDQN MountainCar, due to 10000 steps is not enough for it.
1 parent df55b88 commit 87a63f0

File tree

3 files changed

+239
-99
lines changed

3 files changed

+239
-99
lines changed

src/algorithms/policy_gradient/ddpg.jl

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ mutable struct DDPGPolicy{
2727
act_noise::Float64
2828
step::Int
2929
rng::R
30+
# for logging
31+
actor_loss::Float32
32+
critic_loss::Float32
3033
end
3134

3235
"""
@@ -85,6 +88,7 @@ function DDPGPolicy(;
8588
act_noise,
8689
step,
8790
rng,
91+
0.f0,0.f0,
8892
)
8993
end
9094

@@ -133,13 +137,21 @@ function RLBase.update!(p::DDPGPolicy, traj::CircularCompactSARTSATrajectory)
133137

134138
gs1 = gradient(Flux.params(C)) do
135139
q = C(vcat(s, a)) |> vec
136-
mean((y .- q) .^ 2)
140+
loss = mean((y .- q) .^ 2)
141+
ignore() do
142+
p.critic_loss = loss
143+
end
144+
loss
137145
end
138146

139147
update!(C, gs1)
140148

141149
gs2 = gradient(Flux.params(A)) do
142-
-mean(C(vcat(s, A(s))))
150+
loss = -mean(C(vcat(s, A(s))))
151+
ignore() do
152+
p.actor_loss = loss
153+
end
154+
loss
143155
end
144156

145157
update!(A, gs2)

0 commit comments

Comments
 (0)