@@ -58,6 +58,13 @@ def main():
58
58
viz = Visdom (port = args .port )
59
59
win = None
60
60
61
+ tensorboard_writer = None
62
+ if args .tensorboard_logdir is not None :
63
+ from tensorboardX import SummaryWriter
64
+ import time , os , datetime
65
+ ts_str = datetime .datetime .fromtimestamp (time .time ()).strftime ('%Y-%m-%d_%H-%M-%S' )
66
+ tensorboard_writer = SummaryWriter (log_dir = os .path .join (args .tensorboard_logdir , ts_str ))
67
+
61
68
envs = make_vec_envs (args .env_name , args .seed , args .num_processes ,
62
69
args .gamma , args .log_dir , args .add_timestep , device , False )
63
70
@@ -150,9 +157,19 @@ def main():
150
157
np .mean (episode_rewards ),
151
158
np .median (episode_rewards ),
152
159
np .min (episode_rewards ),
153
- np .max (episode_rewards ), dist_entropy ,
160
+ np .max (episode_rewards ),
161
+ dist_entropy ,
154
162
value_loss , action_loss ))
155
163
164
+ if tensorboard_writer is not None :
165
+ tensorboard_writer .add_scalar ("mean reward" , np .mean (episode_rewards ), total_num_steps )
166
+ tensorboard_writer .add_scalar ("median reward" , np .median (episode_rewards ), total_num_steps )
167
+ tensorboard_writer .add_scalar ("min reward" , np .min (episode_rewards ), total_num_steps )
168
+ tensorboard_writer .add_scalar ("max reward" , np .max (episode_rewards ), total_num_steps )
169
+ tensorboard_writer .add_scalar ("dist entropy" , dist_entropy , total_num_steps )
170
+ tensorboard_writer .add_scalar ("value loss" , value_loss , total_num_steps )
171
+ tensorboard_writer .add_scalar ("action loss" , action_loss , total_num_steps )
172
+
156
173
if (args .eval_interval is not None
157
174
and len (episode_rewards ) > 1
158
175
and j % args .eval_interval == 0 ):
0 commit comments