Skip to content

Commit 61782d8

Browse files
committed
Merge branch 'development'
2 parents 50c1287 + a27bf95 commit 61782d8

36 files changed

+804
-257
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ AngoraPy is available on PyPI.
2828
pip install angorapy
2929
```
3030

31-
### MuJoCo and MuJoCo-Py
31+
### MuJoCo
3232
To train on any MuJoCo-based environment, you will need MuJoCo. As of late 2021, MuJoCo is free and can be [downloaded here](https://mujoco.org/download).
3333
As an interface to python, we use mujoco-py, [available here](https://github.com/openai/mujoco-py). To install both, follow their respective instructions.
3434

angorapy/agent/ppo_agent.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
from angorapy.agent.gather import Gatherer, evaluate, EpsilonGreedyGatherer
2626
from angorapy.agent.ppo.optim import learn_on_batch
2727
from angorapy.common import policies, const
28-
from angorapy.common.const import COLORS, BASE_SAVE_PATH, PRETRAINED_COMPONENTS_PATH, STORAGE_DIR
28+
from angorapy.common.const import COLORS, BASE_SAVE_PATH, PRETRAINED_COMPONENTS_PATH, STORAGE_DIR, PATH_TO_EXPERIMENTS
2929
from angorapy.common.const import MIN_STAT_EPS
3030
from angorapy.common.mpi_optim import MpiAdam
3131
from angorapy.common.policies import BasePolicyDistribution, CategoricalPolicyDistribution, GaussianPolicyDistribution
@@ -68,14 +68,6 @@
6868

6969

7070
class PPOAgent:
71-
"""Agent using the Proximal Policy Optimization Algorithm for learning.
72-
73-
The default is an implementation using two independent models for the critic and the actor. This is of course more
74-
expensive than using shared parameters because we need two forward and backward calculations
75-
per batch however this is what is used in the original paper and most implementations. During development this also
76-
turned out to be beneficial for performance relative to episodes seen in easy tasks (e.g. CartPole) and crucial
77-
to make any significant progress in more difficult environments such as LunarLander.
78-
"""
7971
policy: tf.keras.Model
8072
value: tf.keras.Model
8173
joint: tf.keras.Model
@@ -102,6 +94,14 @@ def __init__(self,
10294
pretrained_components: list = None):
10395
""" Initialize the PPOAgent with given hyperparameters. Policy and value network will be freshly initialized.
10496
97+
Agent using the Proximal Policy Optimization Algorithm for learning.
98+
99+
The default is an implementation using two independent models for the critic and the actor. This is of course more
100+
expensive than using shared parameters because we need two forward and backward calculations
101+
per batch however this is what is used in the original paper and most implementations. During development this also
102+
turned out to be beneficial for performance relative to episodes seen in easy tasks (e.g. CartPole) and crucial
103+
to make any significant progress in more difficult environments such as LunarLander.
104+
105105
Args:
106106
model_builder: a function creating a policy, value and joint model
107107
environment (gym.Env): the environment in which the agent will learn
@@ -219,6 +219,7 @@ def __init__(self,
219219
self.model_export_dir = "storage/saved_models/exports/"
220220
self.agent_id = mpi_comm.bcast(f"{round(time.time())}{random.randint(int(1e5), int(1e6) - 1)}", root=0)
221221
self.agent_directory = f"{BASE_SAVE_PATH}/{self.agent_id}/"
222+
self.experiment_directory = f"{PATH_TO_EXPERIMENTS}/{self.agent_id}/"
222223
if _make_dirs:
223224
os.makedirs(self.model_export_dir, exist_ok=True)
224225
os.makedirs(self.agent_directory, exist_ok=True)
@@ -771,9 +772,9 @@ def report(self, total_iterations):
771772
current_lr = self.lr_schedule
772773

773774
# losses
774-
pi_loss = "-" if len(self.policy_loss_history) == 0 else f"{round(self.policy_loss_history[-1], 2):6.2f}"
775-
v_loss = "-" if len(self.value_loss_history) == 0 else f"{round(self.value_loss_history[-1], 2):8.2f}"
776-
ent = "-" if len(self.entropy_history) == 0 else f"{round(self.entropy_history[-1], 2):6.2f}"
775+
pi_loss = " pi " if len(self.policy_loss_history) == 0 else f"{round(self.policy_loss_history[-1], 2):6.2f}"
776+
v_loss = " v " if len(self.value_loss_history) == 0 else f"{round(self.value_loss_history[-1], 2):8.2f}"
777+
ent = " ent " if len(self.entropy_history) == 0 else f"{round(self.entropy_history[-1], 2):6.2f}"
777778

778779
# tbptt underflow
779780
underflow = f"w: {nc}{self.underflow_history[-1]}{ec}; " if self.underflow_history[-1] is not None else ""

angorapy/analysis/inspect_camera.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,21 @@
11
import os
2+
23
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
34

45
import tensorflow as tf
5-
from environments import *
66

7-
from agent.ppo_agent import PPOAgent
8-
from common.wrappers import make_env
9-
from angorapy.models import get_model_builder
7+
from angorapy.common.wrappers import make_env
108

119
import matplotlib.pyplot as plt
1210

1311
tf.get_logger().setLevel('INFO')
1412

15-
env = make_env("ReachAbsoluteVisual-v0")
16-
agent = PPOAgent(get_model_builder("shadow", "gru"), env, 1024, 8)
13+
env = make_env("HumanoidVisualManipulateBlock-v0")
1714

1815
state = env.reset()
1916

2017
for i in range(100):
2118
state, r, dd, info = env.step(env.action_space.sample())
2219

2320
plt.imshow(state.vision / 255)
24-
plt.show()
21+
plt.show()
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
import json
2+
import os
3+
import re
4+
from json import JSONDecodeError
5+
6+
import matplotlib
7+
import matplotlib.pyplot as plt
8+
import numpy as np
9+
10+
from angorapy.common.const import PATH_TO_EXPERIMENTS, BASE_SAVE_PATH
11+
12+
matplotlib.use('TkAgg')
13+
14+
group_names = ["benchmark-performance-pdl", "benchmark-performance-llc", "benchmark-performance-cp",
15+
"benchmark-performance-ab"]
16+
titles = ["Pendulum", "LunarLanderContinuous", "CartPole", "Acrobot"]
17+
18+
# group_names = [
19+
# "benchmark-performance-ant",
20+
# "benchmark-performance-walker2d",
21+
# "benchmark-performance-swimmer",
22+
# "benchmark-performance-reacher",
23+
# "benchmark-performance-humanoidstandup",
24+
# "benchmark-performance-humanoid",
25+
# "benchmark-performance-hopper",
26+
# "benchmark-performance-halfcheetah"
27+
# ]
28+
29+
# group_names = [
30+
# "benchmark-beta-reach",
31+
# "benchmark-beta-freereach"
32+
# ]
33+
34+
# titles = [n.split("-")[-1].capitalize() for n in group_names]
35+
36+
exp_dir = "../../../" + PATH_TO_EXPERIMENTS
37+
experiment_paths = [os.path.join(exp_dir, p) for p in os.listdir(exp_dir)]
38+
39+
environments = {}
40+
reward_thresholds = {}
41+
experiments_by_groups = {}
42+
envs_available = set()
43+
44+
for exp_path in experiment_paths:
45+
46+
eid_m = re.match("[0-9]+", str(exp_path.split("/")[-1]))
47+
if eid_m:
48+
eid = eid_m.group(0)
49+
model_path = os.path.join(BASE_SAVE_PATH, eid)
50+
51+
if os.path.isfile(os.path.join(exp_path, "progress.json")):
52+
with open(os.path.join(exp_path, "progress.json"), "r") as f:
53+
progress = json.load(f)
54+
55+
with open(os.path.join(exp_path, "meta.json"), "r") as f:
56+
try:
57+
meta = json.load(f)
58+
except JSONDecodeError as jserr:
59+
continue
60+
61+
exp_group = meta.get("experiment_group", "n/a")
62+
63+
if exp_group not in group_names:
64+
continue
65+
66+
reward_threshold = None if meta["environment"]["reward_threshold"] == "None" else float(
67+
meta["environment"]["reward_threshold"])
68+
69+
if not exp_group in experiments_by_groups.keys():
70+
experiments_by_groups[exp_group] = {}
71+
reward_thresholds[exp_group] = reward_threshold
72+
environments[exp_group] = meta["environment"]["name"]
73+
74+
envs_available.add(meta["environment"]["name"])
75+
76+
experiments_by_groups[exp_group].update({
77+
eid: progress
78+
})
79+
80+
n_rows, n_cols = 1, 4
81+
fig, axs = plt.subplots(n_rows, n_cols)
82+
fig.set_size_inches(16, 3 * n_rows)
83+
84+
if not isinstance(axs[0], list):
85+
axs = [axs]
86+
87+
for i, name in enumerate(group_names):
88+
data = experiments_by_groups[name]
89+
reward_trajectories = list(map(lambda x: x["rewards"]["mean"], data.values()))
90+
max_length = max([len(x) for x in reward_trajectories])
91+
padded_reward_trajectories = list(map(lambda x: np.pad(x, (0, max_length - len(x)),
92+
mode="constant",
93+
constant_values=np.nan), reward_trajectories))
94+
mean_reward = np.ma.mean(np.ma.array(padded_reward_trajectories, mask=np.isnan(padded_reward_trajectories)), axis=0)
95+
std_reward = np.ma.std(np.ma.array(padded_reward_trajectories, mask=np.isnan(padded_reward_trajectories)), axis=0)
96+
97+
ax = axs[i // n_cols][i % n_cols]
98+
99+
ax.plot(mean_reward)
100+
ax.fill_between(range(mean_reward.shape[0]), mean_reward - std_reward, mean_reward + std_reward, alpha=.2)
101+
102+
ax.set_xlim(0, mean_reward.shape[0] - 1)
103+
ax.set_ylim(np.min(mean_reward - std_reward), np.max(mean_reward + std_reward))
104+
ax.set_xlabel("Cycle")
105+
ax.set_title(titles[i])
106+
107+
if i % n_cols == 0:
108+
ax.set_ylabel("Episode Return")
109+
110+
plt.savefig(f"../../../docs/figures/benchmarks/{'_'.join(titles)}_benchmark.pdf", format="pdf", bbox_inches='tight')
111+
plt.show()
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
import itertools
2+
import json
3+
import os
4+
import re
5+
from json import JSONDecodeError
6+
from typing import Iterable
7+
8+
import matplotlib
9+
import matplotlib.pyplot as plt
10+
import numpy as np
11+
from matplotlib.axes import Axes
12+
13+
from angorapy.common.const import PATH_TO_EXPERIMENTS, BASE_SAVE_PATH, QUALITATIVE_COLOR_PALETTE
14+
15+
matplotlib.use('TkAgg')
16+
17+
group_names = {
18+
"gaussian": [
19+
"benchmark-performance-ant",
20+
"benchmark-performance-walker2d",
21+
"benchmark-performance-swimmer",
22+
"benchmark-gaussian-reach",
23+
"benchmark-performance-reacher",
24+
"benchmark-performance-hopper",
25+
"benchmark-performance-halfcheetah",
26+
"benchmark-gaussian-freereach"
27+
], "beta": [
28+
"benchmark-beta-ant",
29+
"benchmark-beta-walker2d",
30+
"benchmark-beta-swimmer",
31+
"benchmark-beta-reach",
32+
"benchmark-beta-reacher",
33+
"benchmark-beta-hopper",
34+
"benchmark-beta-halfcheetah",
35+
"benchmark-beta-freereach"
36+
]
37+
}
38+
39+
titles = [n.split("-")[-1].capitalize() for n in group_names[list(group_names.keys())[0]]]
40+
41+
# group_names = {"any": ["benchmark-performance-pdl", "benchmark-performance-llc", "benchmark-performance-cp",
42+
# "benchmark-performance-ab"]}
43+
# titles = ["Pendulum", "LunarLanderContinuous", "CartPole", "Acrobot"]
44+
45+
exp_dir = "../../../" + PATH_TO_EXPERIMENTS
46+
experiment_paths = [os.path.join(exp_dir, p) for p in os.listdir(exp_dir)]
47+
48+
environments = {category: {} for category in group_names.keys()}
49+
reward_thresholds = {category: {} for category in group_names.keys()}
50+
experiments_by_groups = {category: {} for category in group_names.keys()}
51+
envs_available = set()
52+
53+
for exp_path in experiment_paths:
54+
55+
eid_m = re.match("[0-9]+", str(exp_path.split("/")[-1]))
56+
if eid_m:
57+
eid = eid_m.group(0)
58+
model_path = os.path.join(BASE_SAVE_PATH, eid)
59+
60+
if os.path.isfile(os.path.join(exp_path, "progress.json")):
61+
with open(os.path.join(exp_path, "progress.json"), "r") as f:
62+
progress = json.load(f)
63+
64+
with open(os.path.join(exp_path, "meta.json"), "r") as f:
65+
try:
66+
meta = json.load(f)
67+
except JSONDecodeError as jserr:
68+
continue
69+
70+
exp_group = meta.get("experiment_group", "n/a")
71+
72+
if exp_group not in itertools.chain(*group_names.values()):
73+
continue
74+
75+
reward_threshold = None if meta["environment"]["reward_threshold"] == "None" else float(
76+
meta["environment"]["reward_threshold"])
77+
78+
for category in group_names.keys():
79+
if exp_group in group_names[category] and exp_group not in experiments_by_groups[category].keys():
80+
experiments_by_groups[category][exp_group] = {}
81+
reward_thresholds[category][exp_group] = reward_threshold
82+
environments[category][exp_group] = meta["environment"]["name"]
83+
84+
envs_available.add(meta["environment"]["name"])
85+
86+
for category in group_names.keys():
87+
if exp_group in group_names[category]:
88+
experiments_by_groups[category][exp_group].update({
89+
eid: progress
90+
})
91+
92+
n_rows, n_cols = 2, 4
93+
fig, axs = plt.subplots(n_rows, n_cols)
94+
fig.set_size_inches(16, 4 * n_rows)
95+
96+
if not isinstance(axs[0], Iterable):
97+
axs = [axs]
98+
99+
100+
for i_cat, category in enumerate(group_names.keys()):
101+
for i, name in enumerate(group_names[category]):
102+
data = experiments_by_groups[category][name]
103+
reward_trajectories = list(map(lambda x: x["rewards"]["mean"], data.values()))
104+
max_length = max([len(x) for x in reward_trajectories])
105+
padded_reward_trajectories = list(map(lambda x: np.pad(x, (0, max_length - len(x)),
106+
mode="constant",
107+
constant_values=np.nan), reward_trajectories))
108+
mean_reward = np.ma.mean(np.ma.array(padded_reward_trajectories, mask=np.isnan(padded_reward_trajectories)),
109+
axis=0)
110+
std_reward = np.ma.std(np.ma.array(padded_reward_trajectories, mask=np.isnan(padded_reward_trajectories)),
111+
axis=0)
112+
113+
ax: Axes = axs[i // n_cols][i % n_cols]
114+
115+
if reward_thresholds[category][name] is not None:
116+
ax.axhline(reward_thresholds[category][name], color=QUALITATIVE_COLOR_PALETTE[2], ls="--")
117+
ax.plot(mean_reward, label=category, color=QUALITATIVE_COLOR_PALETTE[i_cat])
118+
ax.fill_between(range(mean_reward.shape[0]), mean_reward - std_reward, mean_reward + std_reward, alpha=.2)
119+
120+
ax.set_xlim(0, mean_reward.shape[0] - 1)
121+
ax.set_ylim(min(np.min(mean_reward - std_reward), ax.get_ylim()[0]),
122+
max(np.max(mean_reward + std_reward) * 1.1, ax.get_ylim()[1]))
123+
ax.set_xlabel("Cycle")
124+
ax.set_title(titles[i])
125+
126+
if titles[i] in ["Reach", "Freereach"]:
127+
ax.set_title(titles[i], fontstyle="italic")
128+
129+
if i % n_cols == 0:
130+
ax.set_ylabel("Episode Return")
131+
132+
if len(group_names.keys()) > 1:
133+
ax.legend(loc="lower right")
134+
135+
plt.subplots_adjust(top=0.8, bottom=0.2, hspace=0.35, wspace=0.2)
136+
137+
plt.savefig(f"../../../docs/figures/benchmarks/{'_'.join(titles)}_benchmark_comparison.pdf", format="pdf",
138+
bbox_inches='tight')
139+
plt.show()

angorapy/analysis/plotting/plot_experiment_comparison.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33

44
from matplotlib import pyplot as plt
55

6-
from common.const import PATH_TO_EXPERIMENTS
6+
from angorapy.common.const import PATH_TO_EXPERIMENTS, QUALITATIVE_COLOR_PALETTE
77

8-
experiment_ids = [ 1626374994 ]
8+
experiment_ids = ['1653053413', '1655284851', '1654708464']
99

1010
reward_developments = {}
1111
for id in experiment_ids:
@@ -14,16 +14,20 @@
1414
with open(os.path.join("../../../", PATH_TO_EXPERIMENTS, str(id), "progress.json")) as f:
1515
progress = json.load(f)
1616

17-
exp_name = meta["hyperparameters"]["model"].upper()
17+
exp_name = meta["hyperparameters"]["distribution"]
1818
reward_developments[exp_name] = progress["rewards"]["mean"]
1919

20-
for name, rewards in reward_developments.items():
21-
plt.plot(rewards, label=name)
20+
for i, (name, rewards) in enumerate(reward_developments.items()):
21+
plt.plot(rewards[:800], label=name, color=QUALITATIVE_COLOR_PALETTE[i])
2222

23+
plt.title("In-Hand Object Manipulation")
2324
plt.xlabel("Cycle")
24-
plt.ylabel("Reward")
25-
# plt.legend()
25+
plt.ylabel("Avg. Episode Return")
26+
plt.legend()
2627

27-
plt.gcf().set_size_inches(8, 4)
28+
plt.xlim(0, 800)
29+
plt.ylim(0)
30+
31+
plt.gcf().set_size_inches(16, 4)
2832
# plt.show()
29-
plt.savefig("manipulate-progress.pdf", format="pdf", bbox_inches="tight")
33+
plt.savefig("../../../docs/figures/manipulate-progress.pdf", format="pdf", bbox_inches="tight")

0 commit comments

Comments
 (0)