Skip to content

Commit 48cd557

Browse files
committed
define environment docs
1 parent ca55aca commit 48cd557

File tree

19 files changed

+274
-48
lines changed

19 files changed

+274
-48
lines changed

docs/make.jl

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
push!(LOAD_PATH, "../src/")
22

3-
using Documenter, Dojo
3+
using Documenter#, Dojo
44

55
makedocs(
66
modules = [Dojo],
@@ -22,23 +22,15 @@ makedocs(
2222
"define_controller.md",
2323
],
2424

25-
"Creating Environments" => [
26-
"define_environment.md",
25+
"Environments" => [
2726
"load_environment.md",
27+
"define_environment.md",
2828
],
2929

3030
"Gradients from Simulator" => [
3131
"gradients.md",
3232
],
3333

34-
"Environments" => [
35-
"atlas_env.md",
36-
"quadruped_env.md",
37-
"rexhopper_env.md",
38-
"classic_env.md",
39-
"gym_env.md",
40-
],
41-
4234
"Examples" => [
4335
"simulation.md",
4436
"trajectory_optimization.md",

docs/src/define_environment.md

Lines changed: 217 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,217 @@
1-
# Defining Environment
1+
# Defining an Environment
2+
3+
An [`Environment`](@ref) is a convienient object for applications like reinforcement learning and trajectory optimization.
4+
5+
To demonstrate, we create the [`Dojo.Ant`](@ref) environment. First, we load (or [create](define_mechanism.md)) a mechanism:
6+
7+
```julia
8+
mechanism = get_mechanism(:ant)
9+
```
10+
11+
Next, we create an environment's other attributes.
12+
13+
Dimensions:
14+
```julia
15+
# set state dimension based on representation
16+
if representation == :minimal
17+
nx = minimal_dimension(mechanism)
18+
elseif representation == :maximal
19+
nx = maximal_dimension(mechanism)
20+
end
21+
# set control dimension
22+
nu = 8
23+
# set observation dimension
24+
no = nx
25+
```
26+
27+
Space (for limiting controls and observations):
28+
```julia
29+
# limit controls to [-1.0, 1.0]
30+
aspace = BoxSpace(nu,
31+
low=(-ones(nu)),
32+
high=(ones(nu)))
33+
# no limits on observations
34+
ospace = BoxSpace(no,
35+
low=(-Inf * ones(no)),
36+
high=(Inf * ones(no)))
37+
```
38+
39+
Random number:
40+
```julia
41+
rng = MersenneTwister(seed)
42+
```
43+
44+
Dynamics data:
45+
```julia
46+
# state vector
47+
z = get_maximal_state(mechanism)
48+
x = representation == :minimal ? maximal_to_minimal(mechanism, z) : z
49+
# dynamics Jacobians
50+
fx = zeros(nx, nx)
51+
fu = zeros(nx, nu)
52+
```
53+
54+
Control data:
55+
```julia
56+
# control vector (previous)
57+
u_prev = zeros(nu)
58+
# control map transforms inputs from control to dynamics space
59+
control_mask = [zeros(8, 6) I(nu)]
60+
control_scaling = Diagonal(timestep * 150.0 * ones(nu))
61+
control_map = control_mask' * control_scaling
62+
```
63+
64+
Visuals:
65+
```julia
66+
# create a visualizer
67+
vis = Visualizer()
68+
```
69+
70+
Solver options:
71+
```julia
72+
# simulation options
73+
opts_step = SolverOptions()
74+
# gradient options
75+
opts_grad = SolverOptions()
76+
```
77+
78+
Environment:
79+
```julia
80+
TYPES = [Ant, T, typeof(mechanism), typeof(aspace), typeof(ospace), typeof(info)]
81+
env = Environment{TYPES...}(
82+
mechanism,
83+
representation,
84+
aspace, ospace,
85+
x, fx, fu,
86+
u_prev,
87+
control_map,
88+
nx, nu, no,
89+
info,
90+
[rng],
91+
vis,
92+
opts_sim, opts_grad)
93+
```
94+
95+
With the environment instantiated, we can interact with it by overloading the following methods:
96+
97+
Simulate environment forward one time step:
98+
```julia
99+
function step(env::Environment{Ant}, x, u;
100+
diff=false)
101+
102+
# mechanism
103+
mechanism = env.mechanism
104+
105+
# timestep
106+
timestep = mechanism.timestep
107+
108+
# copy current state
109+
x0 = copy(x)
110+
111+
# cache current control
112+
env.input_previous .= u # for rendering in Gym
113+
u_scaled = env.control_map * u
114+
115+
# representation conversion
116+
z0 = env.representation == :minimal ? minimal_to_maximal(mechanism, x0) : x0
117+
118+
# simulate one step
119+
z1 = step!(mechanism, z0, u_scaled; opts=env.opts_step)
120+
121+
# representation conversion
122+
env.state .= env.representation == :minimal ? maximal_to_minimal(mechanism, z1) : z1
123+
124+
# cost/reward
125+
reward = cost(env, z1, u_scaled)
126+
127+
# check for done
128+
done = is_done(env, z1, u_scaled)
129+
130+
# gradients
131+
if diff
132+
if env.representation == :minimal
133+
fx, fu = get_minimal_gradients!(env.mechanism, z0, u_scaled, opts=env.opts_grad)
134+
elseif env.representation == :maximal
135+
fx, fu = get_maximal_gradients!(env.mechanism, z0, u_scaled, opts=env.opts_grad)
136+
end
137+
env.dynamics_jacobian_state .= fx
138+
env.dynamics_jacobian_input .= fu * env.control_map
139+
end
140+
141+
# information
142+
info = Dict()
143+
144+
return get_observation(env), reward, done, info
145+
end
146+
```
147+
148+
Return environment to nominal state:
149+
```julia
150+
function reset(env::Environment{Ant};
151+
x=nothing)
152+
153+
# initialize
154+
initialize!(env.mechanism, type2symbol(Ant))
155+
156+
if x != nothing
157+
env.state .= x
158+
else
159+
x = get_minimal_state(env.mechanism)
160+
if env.representation == :minimal
161+
set_maximal_state!(env.mechanism, minimal_to_maximal(env.mechanism, x))
162+
env.state .= x
163+
elseif env.representation == :maximal
164+
z = minimal_to_maximal(env.mechanism, x)
165+
set_maximal_state!(env.mechanism, z)
166+
env.state .= z
167+
end
168+
env.input_previous .= 0.0
169+
end
170+
171+
return get_observation(env)
172+
end
173+
```
174+
175+
Observation for current environment state:
176+
```julia
177+
function get_observation(env::Environment{Ant})
178+
contact_force = Float64[]
179+
for contact in env.mechanism.contacts
180+
push!(contact_force, max(-1.0, min(1.0, contact.impulses[2][1])))
181+
end
182+
# include contact forces with state for observation
183+
return [env.state; contact_force]
184+
end
185+
```
186+
187+
Cost/reward associated with simulation step:
188+
```julia
189+
function cost(env::Environment{Ant}, x, u)
190+
# forward reward
191+
v = x[4] # x-direction velocity
192+
forward_reward = 2.0 * v
193+
194+
# control cost
195+
ctrl_cost = (0.05 * u' * u)[1]
196+
197+
# contact cost
198+
contact_cost = 0.0
199+
200+
for contact in mechanism.contacts
201+
contact_cost += 0.5 * 1.0e-3 * max(-1.0, min(1.0, contact.impulses[2][1]))^2.0
202+
end
203+
204+
# survive_reward = 1.0
205+
survive_reward = 0.05
206+
207+
# total reward
208+
reward = forward_reward - ctrl_cost - contact_cost + survive_reward
209+
end
210+
```
211+
212+
Determine if simulation should terminate:
213+
```julia
214+
function is_done(env::Environment{Ant}, x)
215+
!(all(isfinite.(env.state)) && (env.state[3] >= 0.2) && (env.state[3] <= 1.0))
216+
end
217+
```

docs/src/index.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ __[Dojo](https://github.com/dojo-sim/Dojo.jl) is a differentiable simulator for
55
## Features
66
* __Maximal-Coordinates Representation__: Fast and efficient conversion between [maximal](maximal_representation.md) and [minimal](minimal_representation.md) representations
77
* __Smooth Gradients__: Simulation with [hard contact](impact.md) and useful [gradients](gradients.md) through contact events
8-
* __Open Source__: Code is available on [GitHub](https://github.com/dojo-sim/Dojo.jl) and distributed under the MIT Licence.
8+
* __Open Source__: Code is available on [GitHub](https://github.com/dojo-sim/Dojo.jl) and distributed under the MIT Licence
99
* __Python Interface__: [dojopy](https://github.com/dojo-sim/dojopy)
1010

1111
## Installation
@@ -41,4 +41,4 @@ If this project is useful for your work please consider
4141
* Leaving a star on the [GitHub repository](https://github.com/dojo-sim/Dojo.jl)
4242

4343
## Licence
44-
Dojo.jl is licensed under the MIT License For more details click [here](https://github.com/dojo-sim/Dojo.jl/blob/main/LICENSE.md).
44+
Dojo.jl is licensed under the MIT License. For more details click [here](https://github.com/dojo-sim/Dojo.jl/blob/main/LICENSE.md).

docs/src/load_environment.md

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,20 @@
1-
# Load Existing Environment
1+
# Load Existing Environment
2+
3+
Dojo includes a number of existing environments:
4+
5+
- [`Dojo.Ant`](@ref)
6+
- [`Dojo.Atlas`](@ref)
7+
- [`Dojo.Cartpole`](@ref)
8+
- [`Dojo.HalfCheetah`](@ref)
9+
- [`Dojo.Hopper`](@ref)
10+
- [`Dojo.Pendulum`](@ref)
11+
- [`Dojo.Quadruped`](@ref)
12+
- [`Dojo.RaibertHopper`](@ref)
13+
- [`Dojo.RexHopper`](@ref)
14+
- [`Dojo.Walker`](@ref)
15+
16+
Specific environments can be instantiated, for example [`Dojo.Atlas`](@ref):
17+
18+
```julia
19+
env = get_environment(:atlas)
20+
```

environments/ant/methods/env.jl

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,8 @@ function ant(;
6666
TYPES = [Ant, T, typeof(mechanism), typeof(aspace), typeof(ospace), typeof(info)]
6767
env = Environment{TYPES...}(mechanism, representation, aspace, ospace,
6868
x, fx, fu,
69-
u_prev, control_mask, control_scaling,
69+
u_prev,
70+
control_mask' * control_scaling,
7071
nx, nu, no,
7172
info,
7273
[rng], vis,
@@ -85,7 +86,7 @@ function step(env::Environment{Ant}, x, u;
8586

8687
x0 = copy(x)
8788
env.input_previous .= u # for rendering in Gym
88-
u_scaled = env.control_mask' * env.control_scaling * u
89+
u_scaled = env.control_map * u
8990

9091
z0 = env.representation == :minimal ? minimal_to_maximal(mechanism, x0) : x0
9192
z1 = step!(mechanism, z0, u_scaled; opts=env.opts_step)
@@ -124,7 +125,7 @@ function step(env::Environment{Ant}, x, u;
124125
fx, fu = get_maximal_gradients!(env.mechanism, z0, u_scaled, opts=env.opts_grad)
125126
end
126127
env.dynamics_jacobian_state .= fx
127-
env.dynamics_jacobian_input .= fu * env.control_mask' * env.control_scaling
128+
env.dynamics_jacobian_input .= fu * env.control_map
128129
end
129130

130131
info = Dict()

environments/atlas/methods/env.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ function atlas(;
6666
TYPES = [Atlas, T, typeof(mechanism), typeof(aspace), typeof(ospace), typeof(info)]
6767
Environment{TYPES...}(mechanism, representation, aspace, ospace,
6868
x, fx, fu,
69-
u_prev, control_mask, control_scaling,
69+
u_prev, control_mask' * control_scaling,
7070
nx, nu, no,
7171
info,
7272
[rng], vis,

environments/box/methods/env.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ function block(;
6464
TYPES = [Block, T, typeof(mechanism), typeof(aspace), typeof(ospace), typeof(info)]
6565
env = Environment{TYPES...}(mechanism, representation, aspace, ospace,
6666
x, fx, fu,
67-
u_prev, control_mask, control_scaling,
67+
u_prev, control_mask' * control_scaling,
6868
nx, nu, no,
6969
info,
7070
[rng], vis,

environments/box2d/methods/env.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ function box2d(;
5656
TYPES = [Quadruped, T, typeof(mechanism), typeof(aspace), typeof(ospace), typeof(info)]
5757
Environment{TYPES...}(mechanism, representation, aspace, ospace,
5858
x, fx, fu,
59-
u_prev, control_mask, control_scaling,
59+
u_prev, control_mask' * control_scaling,
6060
nx, nu, no,
6161
info,
6262
[rng], vis,

environments/cartpole/methods/env.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ function cartpole(;
5454
TYPES = [Cartpole, T, typeof(mechanism), typeof(aspace), typeof(ospace), typeof(info)]
5555
env = Environment{TYPES...}(mechanism, representation, aspace, ospace,
5656
x, fx, fu,
57-
u_prev, control_mask, control_scaling,
57+
u_prev, control_mask' * control_scaling,
5858
nx, nu, no,
5959
info,
6060
[rng], vis,

0 commit comments

Comments
 (0)