Skip to content

Commit 07b7fc2

Browse files
committed
Merge branch 'master' of github.com:datawhalechina/easy-rl
2 parents 5315819 + bab7f6f commit 07b7fc2

File tree

66 files changed

+247
-841
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

66 files changed

+247
-841
lines changed
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
------------------ start ------------------
2+
algo_name : A2C
3+
env_name : CartPole-v0
4+
n_envs : 8
5+
max_steps : 30000
6+
n_steps : 5
7+
gamma : 0.99
8+
lr : 0.001
9+
hidden_dim : 256
10+
result_path : c:\Users\24438\Desktop\rl-tutorials\codes\A2C/outputs/CartPole-v0/20220713-221850/results/
11+
model_path : c:\Users\24438\Desktop\rl-tutorials\codes\A2C/outputs/CartPole-v0/20220713-221850/models/
12+
save_fig : True
13+
device : cuda
14+
------------------- end -------------------
Binary file not shown.
Binary file not shown.
Loading

codes/A2C/task0.py

Lines changed: 45 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,43 @@
1-
import sys
2-
import os
3-
curr_path = os.path.dirname(os.path.abspath(__file__)) # 当前文件所在绝对路径
4-
parent_path = os.path.dirname(curr_path) # 父路径
5-
sys.path.append(parent_path) # 添加路径到系统路径
1+
import sys,os
2+
curr_path = os.path.dirname(os.path.abspath(__file__)) # current path
3+
parent_path = os.path.dirname(curr_path) # parent path
4+
sys.path.append(parent_path) # add to system path
65

76
import gym
87
import numpy as np
98
import torch
109
import torch.optim as optim
1110
import datetime
11+
import argparse
1212
from common.multiprocessing_env import SubprocVecEnv
1313
from a2c import ActorCritic
1414
from common.utils import save_results, make_dir
15-
from common.utils import plot_rewards
15+
from common.utils import plot_rewards, save_args
1616

17-
curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # 获取当前时间
18-
algo_name = 'A2C' # 算法名称
19-
env_name = 'CartPole-v0' # 环境名称
2017

21-
class A2CConfig:
22-
def __init__(self) -> None:
23-
self.algo_name = algo_name# 算法名称
24-
self.env_name = env_name # 环境名称
25-
self.n_envs = 8 # 异步的环境数目
26-
self.gamma = 0.99 # 强化学习中的折扣因子
27-
self.hidden_dim = 256
28-
self.lr = 1e-3 # learning rate
29-
self.max_frames = 30000
30-
self.n_steps = 5
31-
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
32-
class PlotConfig:
33-
def __init__(self) -> None:
34-
self.algo_name = algo_name # 算法名称
35-
self.env_name = env_name # 环境名称
36-
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 检测GPU
37-
self.result_path = curr_path+"/outputs/" + self.env_name + \
38-
'/'+curr_time+'/results/' # 保存结果的路径
39-
self.model_path = curr_path+"/outputs/" + self.env_name + \
40-
'/'+curr_time+'/models/' # 保存模型的路径
41-
self.save = True # 是否保存图片
42-
18+
def get_args():
19+
""" Hyperparameters
20+
"""
21+
curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # Obtain current time
22+
parser = argparse.ArgumentParser(description="hyperparameters")
23+
parser.add_argument('--algo_name',default='A2C',type=str,help="name of algorithm")
24+
parser.add_argument('--env_name',default='CartPole-v0',type=str,help="name of environment")
25+
parser.add_argument('--n_envs',default=8,type=int,help="numbers of environments")
26+
27+
parser.add_argument('--max_steps',default=20000,type=int,help="episodes of training")
28+
parser.add_argument('--n_steps',default=5,type=int,help="episodes of testing")
29+
parser.add_argument('--gamma',default=0.99,type=float,help="discounted factor")
30+
parser.add_argument('--lr',default=1e-3,type=float,help="learning rate")
31+
parser.add_argument('--hidden_dim',default=256,type=int)
32+
parser.add_argument('--result_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \
33+
'/' + curr_time + '/results/' )
34+
parser.add_argument('--model_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \
35+
'/' + curr_time + '/models/' ) # path to save models
36+
parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not")
37+
args = parser.parse_args()
38+
args.device = torch.device(
39+
"cuda" if torch.cuda.is_available() else "cpu") # check GPU
40+
return args
4341

4442
def make_envs(env_name):
4543
def _thunk():
@@ -60,6 +58,7 @@ def test_env(env,model,vis=False):
6058
if vis: env.render()
6159
total_reward += reward
6260
return total_reward
61+
6362
def compute_returns(next_value, rewards, masks, gamma=0.99):
6463
R = next_value
6564
returns = []
@@ -70,19 +69,19 @@ def compute_returns(next_value, rewards, masks, gamma=0.99):
7069

7170

7271
def train(cfg,envs):
73-
print('开始训练!')
74-
print(f'环境:{cfg.env_name}, 算法:{cfg.algo}, 设备:{cfg.device}')
72+
print('Start training!')
73+
print(f'Env:{cfg.env_name}, Algorithm:{cfg.algo_name}, Device:{cfg.device}')
7574
env = gym.make(cfg.env_name) # a single env
7675
env.seed(10)
7776
n_states = envs.observation_space.shape[0]
7877
n_actions = envs.action_space.n
7978
model = ActorCritic(n_states, n_actions, cfg.hidden_dim).to(cfg.device)
8079
optimizer = optim.Adam(model.parameters())
81-
frame_idx = 0
80+
step_idx = 0
8281
test_rewards = []
8382
test_ma_rewards = []
8483
state = envs.reset()
85-
while frame_idx < cfg.max_frames:
84+
while step_idx < cfg.max_steps:
8685
log_probs = []
8786
values = []
8887
rewards = []
@@ -101,16 +100,16 @@ def train(cfg,envs):
101100
rewards.append(torch.FloatTensor(reward).unsqueeze(1).to(cfg.device))
102101
masks.append(torch.FloatTensor(1 - done).unsqueeze(1).to(cfg.device))
103102
state = next_state
104-
frame_idx += 1
105-
if frame_idx % 100 == 0:
103+
step_idx += 1
104+
if step_idx % 100 == 0:
106105
test_reward = np.mean([test_env(env,model) for _ in range(10)])
107-
print(f"frame_idx:{frame_idx}, test_reward:{test_reward}")
106+
print(f"step_idx:{step_idx}, test_reward:{test_reward}")
108107
test_rewards.append(test_reward)
109108
if test_ma_rewards:
110109
test_ma_rewards.append(0.9*test_ma_rewards[-1]+0.1*test_reward)
111110
else:
112111
test_ma_rewards.append(test_reward)
113-
# plot(frame_idx, test_rewards)
112+
# plot(step_idx, test_rewards)
114113
next_state = torch.FloatTensor(next_state).to(cfg.device)
115114
_, next_value = model(next_state)
116115
returns = compute_returns(next_value, rewards, masks)
@@ -124,15 +123,15 @@ def train(cfg,envs):
124123
optimizer.zero_grad()
125124
loss.backward()
126125
optimizer.step()
127-
print('完成训练!')
126+
print('Finish training!')
128127
return test_rewards, test_ma_rewards
129128
if __name__ == "__main__":
130-
cfg = A2CConfig()
131-
plot_cfg = PlotConfig()
129+
cfg = get_args()
132130
envs = [make_envs(cfg.env_name) for i in range(cfg.n_envs)]
133131
envs = SubprocVecEnv(envs)
134-
# 训练
132+
# training
135133
rewards,ma_rewards = train(cfg,envs)
136-
make_dir(plot_cfg.result_path,plot_cfg.model_path)
137-
save_results(rewards, ma_rewards, tag='train', path=plot_cfg.result_path) # 保存结果
138-
plot_rewards(rewards, ma_rewards, plot_cfg, tag="train") # 画出结果
134+
make_dir(cfg.result_path,cfg.model_path)
135+
save_args(cfg)
136+
save_results(rewards, ma_rewards, tag='train', path=cfg.result_path) # 保存结果
137+
plot_rewards(rewards, ma_rewards, cfg, tag="train") # 画出结果

codes/DDPG/README.md

Lines changed: 0 additions & 7 deletions
This file was deleted.
-259 KB
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
------------------ start ------------------
2+
algo_name : DDPG
3+
env_name : Pendulum-v1
4+
train_eps : 300
5+
test_eps : 20
6+
gamma : 0.99
7+
critic_lr : 0.001
8+
actor_lr : 0.0001
9+
memory_capacity : 8000
10+
batch_size : 128
11+
target_update : 2
12+
soft_tau : 0.01
13+
hidden_dim : 256
14+
result_path : c:\Users\24438\Desktop\rl-tutorials\codes\DDPG/outputs/Pendulum-v1/20220713-225402/results/
15+
model_path : c:\Users\24438\Desktop\rl-tutorials\codes\DDPG/outputs/Pendulum-v1/20220713-225402/models/
16+
save_fig : True
17+
device : cuda
18+
------------------- end -------------------
Binary file not shown.
Binary file not shown.
Loading
Binary file not shown.
Binary file not shown.
Loading

codes/DDPG/task0.py

Lines changed: 45 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -5,59 +5,51 @@
55
@Email: johnjim0816@gmail.com
66
@Date: 2020-06-11 20:58:21
77
@LastEditor: John
8-
LastEditTime: 2022-06-09 19:05:20
8+
LastEditTime: 2022-07-13 22:53:11
99
@Discription:
1010
@Environment: python 3.7.7
1111
'''
1212
import sys,os
13-
os.environ['KMP_DUPLICATE_LIB_OK']='True'
14-
curr_path = os.path.dirname(os.path.abspath(__file__)) # 当前文件所在绝对路径
15-
parent_path = os.path.dirname(curr_path) # 父路径
16-
sys.path.append(parent_path) # 添加路径到系统路径sys.path
13+
curr_path = os.path.dirname(os.path.abspath(__file__)) # current path
14+
parent_path = os.path.dirname(curr_path) # parent path
15+
sys.path.append(parent_path) # add to system path
1716

1817
import datetime
1918
import gym
2019
import torch
20+
import argparse
2121

2222
from env import NormalizedActions,OUNoise
2323
from ddpg import DDPG
2424
from common.utils import save_results,make_dir
25-
from common.utils import plot_rewards
25+
from common.utils import plot_rewards,save_args
2626

27-
curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # 获取当前时间
28-
class Config:
29-
'''超参数
30-
'''
31-
32-
def __init__(self):
33-
################################## 环境超参数 ###################################
34-
self.algo_name = 'DDPG' # 算法名称
35-
self.env_name = 'Pendulum-v1' # 环境名称,gym新版本(约0.21.0之后)中Pendulum-v0改为Pendulum-v1
36-
self.device = torch.device(
37-
"cuda" if torch.cuda.is_available() else "cpu") # 检测GPUgjgjlkhfsf风刀霜的撒发十
38-
self.seed = 10 # 随机种子,置0则不设置随机种子
39-
self.train_eps = 300 # 训练的回合数
40-
self.test_eps = 20 # 测试的回合数
41-
################################################################################
42-
43-
################################## 算法超参数 ###################################
44-
self.gamma = 0.99 # 折扣因子
45-
self.critic_lr = 1e-3 # 评论家网络的学习率
46-
self.actor_lr = 1e-4 # 演员网络的学习率
47-
self.memory_capacity = 8000 # 经验回放的容量
48-
self.batch_size = 128 # mini-batch SGD中的批量大小
49-
self.target_update = 2 # 目标网络的更新频率
50-
self.hidden_dim = 256 # 网络隐藏层维度
51-
self.soft_tau = 1e-2 # 软更新参数
52-
################################################################################
53-
54-
################################# 保存结果相关参数 ################################
55-
self.result_path = curr_path + "/outputs/" + self.env_name + \
56-
'/' + curr_time + '/results/' # 保存结果的路径
57-
self.model_path = curr_path + "/outputs/" + self.env_name + \
58-
'/' + curr_time + '/models/' # 保存模型的路径
59-
self.save = True # 是否保存图片
60-
################################################################################
27+
def get_args():
28+
""" Hyperparameters
29+
"""
30+
curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # Obtain current time
31+
parser = argparse.ArgumentParser(description="hyperparameters")
32+
parser.add_argument('--algo_name',default='DDPG',type=str,help="name of algorithm")
33+
parser.add_argument('--env_name',default='Pendulum-v1',type=str,help="name of environment")
34+
parser.add_argument('--train_eps',default=300,type=int,help="episodes of training")
35+
parser.add_argument('--test_eps',default=20,type=int,help="episodes of testing")
36+
parser.add_argument('--gamma',default=0.99,type=float,help="discounted factor")
37+
parser.add_argument('--critic_lr',default=1e-3,type=float,help="learning rate of critic")
38+
parser.add_argument('--actor_lr',default=1e-4,type=float,help="learning rate of actor")
39+
parser.add_argument('--memory_capacity',default=8000,type=int,help="memory capacity")
40+
parser.add_argument('--batch_size',default=128,type=int)
41+
parser.add_argument('--target_update',default=2,type=int)
42+
parser.add_argument('--soft_tau',default=1e-2,type=float)
43+
parser.add_argument('--hidden_dim',default=256,type=int)
44+
parser.add_argument('--result_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \
45+
'/' + curr_time + '/results/' )
46+
parser.add_argument('--model_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \
47+
'/' + curr_time + '/models/' ) # path to save models
48+
parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not")
49+
args = parser.parse_args()
50+
args.device = torch.device(
51+
"cuda" if torch.cuda.is_available() else "cpu") # check GPU
52+
return args
6153

6254
def env_agent_config(cfg,seed=1):
6355
env = NormalizedActions(gym.make(cfg.env_name)) # 装饰action噪声
@@ -67,9 +59,9 @@ def env_agent_config(cfg,seed=1):
6759
agent = DDPG(n_states,n_actions,cfg)
6860
return env,agent
6961
def train(cfg, env, agent):
70-
print('开始训练!')
71-
print(f'环境:{cfg.env_name},算法:{cfg.algo_name},设备:{cfg.device}')
72-
ou_noise = OUNoise(env.action_space) # 动作噪声
62+
print('Start training!')
63+
print(f'Env:{cfg.env_name}, Algorithm:{cfg.algo_name}, Device:{cfg.device}')
64+
ou_noise = OUNoise(env.action_space) # noise of action
7365
rewards = [] # 记录所有回合的奖励
7466
ma_rewards = [] # 记录所有回合的滑动平均奖励
7567
for i_ep in range(cfg.train_eps):
@@ -88,18 +80,18 @@ def train(cfg, env, agent):
8880
agent.update()
8981
state = next_state
9082
if (i_ep+1)%10 == 0:
91-
print('回合:{}/{},奖励:{:.2f}'.format(i_ep+1, cfg.train_eps, ep_reward))
83+
print(f'Env:{i_ep+1}/{cfg.train_eps}, Reward:{ep_reward:.2f}')
9284
rewards.append(ep_reward)
9385
if ma_rewards:
9486
ma_rewards.append(0.9*ma_rewards[-1]+0.1*ep_reward)
9587
else:
9688
ma_rewards.append(ep_reward)
97-
print('完成训练!')
89+
print('Finish training!')
9890
return rewards, ma_rewards
9991

10092
def test(cfg, env, agent):
101-
print('开始测试!')
102-
print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}')
93+
print('Start testing')
94+
print(f'Env:{cfg.env_name}, Algorithm:{cfg.algo_name}, Device:{cfg.device}')
10395
rewards = [] # 记录所有回合的奖励
10496
ma_rewards = [] # 记录所有回合的滑动平均奖励
10597
for i_ep in range(cfg.test_eps):
@@ -113,25 +105,25 @@ def test(cfg, env, agent):
113105
next_state, reward, done, _ = env.step(action)
114106
ep_reward += reward
115107
state = next_state
116-
print('回合:{}/{}, 奖励:{}'.format(i_ep+1, cfg.train_eps, ep_reward))
117108
rewards.append(ep_reward)
118109
if ma_rewards:
119110
ma_rewards.append(0.9*ma_rewards[-1]+0.1*ep_reward)
120111
else:
121112
ma_rewards.append(ep_reward)
122-
print(f"回合:{i_ep+1}/{cfg.test_eps},奖励:{ep_reward:.1f}")
123-
print('完成测试!')
113+
print(f"Epside:{i_ep+1}/{cfg.test_eps}, Reward:{ep_reward:.1f}")
114+
print('Finish testing!')
124115
return rewards, ma_rewards
125116
if __name__ == "__main__":
126-
cfg = Config()
127-
# 训练
117+
cfg = get_args()
118+
# training
128119
env,agent = env_agent_config(cfg,seed=1)
129120
rewards, ma_rewards = train(cfg, env, agent)
130121
make_dir(cfg.result_path, cfg.model_path)
122+
save_args(cfg)
131123
agent.save(path=cfg.model_path)
132124
save_results(rewards, ma_rewards, tag='train', path=cfg.result_path)
133125
plot_rewards(rewards, ma_rewards, cfg, tag="train") # 画出结果
134-
# 测试
126+
# testing
135127
env,agent = env_agent_config(cfg,seed=10)
136128
agent.load(path=cfg.model_path)
137129
rewards,ma_rewards = test(cfg,env,agent)

0 commit comments

Comments
 (0)