Format .jl files (#70)

github-actions[bot] · web-flow · commit 6409d3a3f9b9 · 2020-08-17T02:07:45.000+08:00
Co-authored-by: github-actions[bot] &lt;41898282+github-actions[bot]@users.noreply.github.com&gt;
diff --git a/src/algorithms/dqns/common.jl b/src/algorithms/dqns/common.jl
@@ -91,15 +91,8 @@ function RLBase.update!(p::QBasedPolicy{<:PERLearners}, t::AbstractTrajectory)
     end
 end
 
-function (agent::Agent{<:QBasedPolicy{<:PERLearners}})(
-    ::RLCore.Training{PostActStage},
-    env,
-)
-    push!(
-        agent.trajectory;
-        reward = get_reward(env),
-        terminal = get_terminal(env),
-    )
+function (agent::Agent{<:QBasedPolicy{<:PERLearners}})(::RLCore.Training{PostActStage}, env)
+    push!(agent.trajectory; reward = get_reward(env), terminal = get_terminal(env))
     if haskey(agent.trajectory, :priority)
         push!(agent.trajectory; priority = agent.policy.learner.default_priority)
     end
diff --git a/src/algorithms/dqns/dqn.jl b/src/algorithms/dqns/dqn.jl
@@ -88,13 +88,16 @@ end
     if `!isnothing(stack_size)`.
 """
 function (learner::DQNLearner)(env)
-    probs = env |>
-    get_state |>
-    x -> Flux.unsqueeze(x, ndims(x) + 1) |>
-    x -> send_to_device(device(learner.approximator), x) |>
-    learner.approximator |>
-    vec |>
-    send_to_host
+    probs =
+        env |>
+        get_state |>
+        x ->
+            Flux.unsqueeze(x, ndims(x) + 1) |>
+            x ->
+                send_to_device(device(learner.approximator), x) |>
+                learner.approximator |>
+                vec |>
+                send_to_host
 
     if ActionStyle(env) === FULL_ACTION_SET
         probs .+= typemin(eltype(probs)) .* (1 .- get_legal_actions_mask(env))
@@ -130,7 +133,9 @@ function RLBase.update!(learner::DQNLearner, t::AbstractTrajectory)
 
     target_q = Qₜ(next_states)
     if haskey(t, :next_legal_actions_mask)
-        target_q .+= typemin(eltype(target_q)) .* (1 .- send_to_device(D, t[:next_legal_actions_mask]))
+        target_q .+=
+            typemin(eltype(target_q)) .*
+            (1 .- send_to_device(D, t[:next_legal_actions_mask]))
     end
 
     q′ = dropdims(maximum(target_q; dims = 1), dims = 1)
diff --git a/src/algorithms/dqns/iqn.jl b/src/algorithms/dqns/iqn.jl
@@ -187,7 +187,9 @@ function RLBase.update!(learner::IQNLearner, batch::NamedTuple)
     avg_zₜ = mean(zₜ, dims = 2)
 
     if !isnothing(batch.next_legal_actions_mask)
-        avg_zₜ .+= typemin(eltype(avg_zₜ)) .* (1 .- send_to_device(D, batch.next_legal_actions_mask))
+        avg_zₜ .+=
+            typemin(eltype(avg_zₜ)) .*
+            (1 .- send_to_device(D, batch.next_legal_actions_mask))
     end
 
     aₜ = argmax(avg_zₜ, dims = 1)
@@ -224,7 +226,8 @@ function RLBase.update!(learner::IQNLearner, batch::NamedTuple)
             huber_loss ./ κ
         loss_per_quantile = reshape(sum(raw_loss; dims = 1), N, batch_size)
         loss_per_element = mean(loss_per_quantile; dims = 1)  # use as priorities
-        loss = is_use_PER ? dot(vec(weights), vec(loss_per_element)) * 1 // batch_size : mean(loss_per_element)
+        loss = is_use_PER ? dot(vec(weights), vec(loss_per_element)) * 1 // batch_size :
+                mean(loss_per_element)
         ignore() do
             # @assert all(loss_per_element .>= 0)
             is_use_PER && (
diff --git a/src/algorithms/dqns/prioritized_dqn.jl b/src/algorithms/dqns/prioritized_dqn.jl
@@ -102,13 +102,16 @@ end
     if `!isnothing(stack_size)`.
 """
 function (learner::PrioritizedDQNLearner)(env)
-    probs = env |>
-    get_state |>
-    x -> Flux.unsqueeze(x, ndims(x) + 1) |>
-    x -> send_to_device(device(learner.approximator), x) |>
-    learner.approximator |>
-    vec |>
-    send_to_host
+    probs =
+        env |>
+        get_state |>
+        x ->
+            Flux.unsqueeze(x, ndims(x) + 1) |>
+            x ->
+                send_to_device(device(learner.approximator), x) |>
+                learner.approximator |>
+                vec |>
+                send_to_host
 
     if ActionStyle(env) === FULL_ACTION_SET
         probs .+= typemin(eltype(probs)) .* (1 .- get_legal_actions_mask(env))
@@ -138,7 +141,9 @@ function RLBase.update!(learner::PrioritizedDQNLearner, batch::NamedTuple)
 
     target_q = Qₜ(next_states)
     if !isnothing(batch.next_legal_actions_mask)
-        target_q .+= typemin(eltype(target_q)) .* (1 .- send_to_device(D, batch.next_legal_actions_mask))
+        target_q .+=
+            typemin(eltype(target_q)) .*
+            (1 .- send_to_device(D, batch.next_legal_actions_mask))
     end
 
     q′ = dropdims(maximum(target_q; dims = 1), dims = 1)
diff --git a/src/algorithms/dqns/rainbow.jl b/src/algorithms/dqns/rainbow.jl
@@ -161,7 +161,9 @@ function RLBase.update!(learner::RainbowLearner, batch::NamedTuple)
     next_probs = reshape(softmax(reshape(next_logits, n_atoms, :)), n_atoms, n_actions, :)
     next_q = reshape(sum(support .* next_probs, dims = 1), n_actions, :)
     if !isnothing(batch.next_legal_actions_mask)
-        next_q .+= typemin(eltype(next_q)) .* (1 .- send_to_device(D, batch.next_legal_actions_mask))
+        next_q .+=
+            typemin(eltype(next_q)) .*
+            (1 .- send_to_device(D, batch.next_legal_actions_mask))
     end
     next_prob_select = select_best_probs(next_probs, next_q)
 
@@ -186,7 +188,8 @@ function RLBase.update!(learner::RainbowLearner, batch::NamedTuple)
         logits = reshape(Q(states), n_atoms, n_actions, :)
         select_logits = logits[:, actions]
         batch_losses = loss_func(select_logits, target_distribution)
-        loss = is_use_PER ? dot(vec(weights), vec(batch_losses)) * 1 // batch_size : mean(batch_losses)
+        loss = is_use_PER ? dot(vec(weights), vec(batch_losses)) * 1 // batch_size :
+                mean(batch_losses)
         ignore() do
             if is_use_PER
                 updated_priorities .= send_to_host(vec((batch_losses .+ 1f-10) .^ β))
diff --git a/src/algorithms/policy_gradient/A2C.jl b/src/algorithms/policy_gradient/A2C.jl
@@ -28,10 +28,11 @@ Base.@kwdef mutable struct A2CLearner{A<:ActorCritic} <: AbstractLearner
 end
 
 function (learner::A2CLearner)(env::MultiThreadEnv)
-    logits = learner.approximator.actor(send_to_device(
-        device(learner.approximator),
-        get_state(env),
-    )) |> send_to_host
+    logits =
+        learner.approximator.actor(send_to_device(
+            device(learner.approximator),
+            get_state(env),
+        )) |> send_to_host
 
     if ActionStyle(env[1]) === FULL_ACTION_SET
         logits .+= typemin(eltype(logits)) .* (1 .- get_legal_actions_mask(env))
@@ -87,7 +88,9 @@ function RLBase.update!(learner::A2CLearner, t::AbstractTrajectory)
     gs = gradient(ps) do
         logits = AC.actor(states_flattened)
         if haskey(t, :legal_actions_mask)
-            logits .+= typemin(eltype(logits)) .* (1 .- flatten_batch(send_to_device(D, t[:legal_actions_mask])))
+            logits .+=
+                typemin(eltype(logits)) .*
+                (1 .- flatten_batch(send_to_device(D, t[:legal_actions_mask])))
         end
         probs = softmax(logits)
         log_probs = logsoftmax(logits)
diff --git a/src/algorithms/policy_gradient/ppo.jl b/src/algorithms/policy_gradient/ppo.jl
@@ -75,10 +75,11 @@ function PPOLearner(;
 end
 
 function (learner::PPOLearner)(env::MultiThreadEnv)
-    logits = learner.approximator.actor(send_to_device(
-        device(learner.approximator),
-        get_state(env),
-    )) |> send_to_host
+    logits =
+        learner.approximator.actor(send_to_device(
+            device(learner.approximator),
+            get_state(env),
+        )) |> send_to_host
 
     if ActionStyle(env[1]) === FULL_ACTION_SET
         logits .+= typemin(eltype(logits)) .* (1 .- get_legal_actions_mask(env))
@@ -139,7 +140,10 @@ function RLBase.update!(learner::PPOLearner, t::PPOTrajectory)
             inds = rand_inds[(i-1)*microbatch_size+1:i*microbatch_size]
             s = send_to_device(D, select_last_dim(states_flatten, inds))
             if haskey(t, :legal_actions_mask)
-                lam = send_to_device(D, select_last_dim(flatten_batch(t[:legal_actions_mask]), inds))
+                lam = send_to_device(
+                    D,
+                    select_last_dim(flatten_batch(t[:legal_actions_mask]), inds),
+                )
             end
             a = vec(actions)[inds]
             r = send_to_device(D, vec(returns)[inds])