@@ -1634,26 +1634,26 @@ modules we need.
1634
1634
1635
1635
1636
1636
0%| | 0/10000 [00:00<?, ?it/s]
1637
- 8%|8 | 800/10000 [00:00<00:05, 1722.07it /s]
1638
- 16%|#6 | 1600/10000 [00:02<00:17, 481.23it /s]
1639
- 24%|##4 | 2400/10000 [00:03<00:10, 692.11it /s]
1640
- 32%|###2 | 3200/10000 [00:04<00:07, 889.74it /s]
1641
- 40%|#### | 4000/10000 [00:04<00:05, 1050.97it /s]
1642
- 48%|####8 | 4800/10000 [00:05<00:04, 1176.81it /s]
1643
- 56%|#####6 | 5600/10000 [00:05<00:03, 1298.20it /s]
1644
- reward: -2.92 (r0 = -2.79 ), reward eval: reward: 0.00 , reward normalized=-2.66 /6.18 , grad norm= 64.98 , loss_value= 354.58 , loss_actor= 15.12 , target value: -16.19 : 56%|#####6 | 5600/10000 [00:06<00:03, 1298.20it /s]
1645
- reward: -2.92 (r0 = -2.79 ), reward eval: reward: 0.00 , reward normalized=-2.66 /6.18 , grad norm= 64.98 , loss_value= 354.58 , loss_actor= 15.12 , target value: -16.19 : 64%|######4 | 6400/10000 [00:06 <00:03, 906.92it /s]
1646
- reward: -0.16 (r0 = -2.79 ), reward eval: reward: 0.00 , reward normalized=-2.09 /5.62 , grad norm= 124.82 , loss_value= 219.50 , loss_actor= 14.16 , target value: -13.41 : 64%|######4 | 6400/10000 [00:07<00:03, 906.92it /s]
1647
- reward: -0.16 (r0 = -2.79 ), reward eval: reward: 0.00 , reward normalized=-2.09 /5.62 , grad norm= 124.82 , loss_value= 219.50 , loss_actor= 14.16 , target value: -13.41 : 72%|#######2 | 7200/10000 [00:08<00:03, 750.54it /s]
1648
- reward: -2.73 (r0 = -2.79 ), reward eval: reward: 0.00 , reward normalized=-2.40/5.86 , grad norm= 125.51 , loss_value= 239.26 , loss_actor= 12.55 , target value: -15.01 : 72%|#######2 | 7200/10000 [00:09<00:03, 750.54it /s]
1649
- reward: -2.73 (r0 = -2.79 ), reward eval: reward: 0.00 , reward normalized=-2.40/5.86 , grad norm= 125.51 , loss_value= 239.26 , loss_actor= 12.55 , target value: -15.01 : 80%|######## | 8000/10000 [00:09 <00:02, 671.44it /s]
1650
- reward: -4.45 (r0 = -2.79 ), reward eval: reward: 0.00 , reward normalized=-2.48/5.26 , grad norm= 133.72 , loss_value= 212.55 , loss_actor= 17.56 , target value: -16.44 : 80%|######## | 8000/10000 [00:10 <00:02, 671.44it /s]
1651
- reward: -4.45 (r0 = -2.79 ), reward eval: reward: 0.00 , reward normalized=-2.48/5.26 , grad norm= 133.72 , loss_value= 212.55 , loss_actor= 17.56 , target value: -16.44 : 88%|########8 | 8800/10000 [00:11<00:01, 614.51it /s]
1652
- reward: -5 .41 (r0 = -2.79 ), reward eval: reward: -5.53 , reward normalized=-3.08/5.18 , grad norm= 130.58 , loss_value= 202.59 , loss_actor= 19.66 , target value: -20.80 : 88%|########8 | 8800/10000 [00:14<00:01, 614.51it /s]
1653
- reward: -5 .41 (r0 = -2.79 ), reward eval: reward: -5.53 , reward normalized=-3.08/5.18 , grad norm= 130.58 , loss_value= 202.59 , loss_actor= 19.66 , target value: -20.80 : 96%|#########6| 9600/10000 [00:14 <00:00, 406.37it /s]
1654
- reward: -5.40 (r0 = -2.79 ), reward eval: reward: -5.53 , reward normalized=-3.54 /5.03 , grad norm= 329.46 , loss_value= 245.21 , loss_actor= 19.56 , target value: -25.54 : 96%|#########6| 9600/10000 [00:15 <00:00, 406.37it /s]
1655
- reward: -5.40 (r0 = -2.79 ), reward eval: reward: -5.53 , reward normalized=-3.54 /5.03 , grad norm= 329.46 , loss_value= 245.21 , loss_actor= 19.56 , target value: -25.54 : : 10400it [00:17, 371.25it /s]
1656
- reward: -5.22 (r0 = -2.79 ), reward eval: reward: -5.53 , reward normalized=-4.00 /4.30 , grad norm= 107.23 , loss_value= 199.22 , loss_actor= 24.13 , target value: -27.65 : : 10400it [00:18, 371.25it /s]
1637
+ 8%|8 | 800/10000 [00:00<00:05, 1695.61it /s]
1638
+ 16%|#6 | 1600/10000 [00:02<00:17, 482.88it /s]
1639
+ 24%|##4 | 2400/10000 [00:03<00:11, 687.66it /s]
1640
+ 32%|###2 | 3200/10000 [00:04<00:07, 889.93it /s]
1641
+ 40%|#### | 4000/10000 [00:04<00:05, 1057.38it /s]
1642
+ 48%|####8 | 4800/10000 [00:05<00:04, 1193.59it /s]
1643
+ 56%|#####6 | 5600/10000 [00:05<00:03, 1290.37it /s]
1644
+ reward: -2.84 (r0 = -3.17 ), reward eval: reward: -0.01 , reward normalized=-2.43 /6.31 , grad norm= 71.04 , loss_value= 396.71 , loss_actor= 14.53 , target value: -14.87 : 56%|#####6 | 5600/10000 [00:06<00:03, 1290.37it /s]
1645
+ reward: -2.84 (r0 = -3.17 ), reward eval: reward: -0.01 , reward normalized=-2.43 /6.31 , grad norm= 71.04 , loss_value= 396.71 , loss_actor= 14.53 , target value: -14.87 : 64%|######4 | 6400/10000 [00:07 <00:04, 851.50it /s]
1646
+ reward: -0.15 (r0 = -3.17 ), reward eval: reward: -0.01 , reward normalized=-1.76 /5.98 , grad norm= 118.29 , loss_value= 304.53 , loss_actor= 12.29 , target value: -10.82 : 64%|######4 | 6400/10000 [00:07<00:04, 851.50it /s]
1647
+ reward: -0.15 (r0 = -3.17 ), reward eval: reward: -0.01 , reward normalized=-1.76 /5.98 , grad norm= 118.29 , loss_value= 304.53 , loss_actor= 12.29 , target value: -10.82 : 72%|#######2 | 7200/10000 [00:08<00:04, 692.07it /s]
1648
+ reward: -2.41 (r0 = -3.17 ), reward eval: reward: -0.01 , reward normalized=-1.76/6.10 , grad norm= 100.06 , loss_value= 349.30 , loss_actor= 11.72 , target value: -12.19 : 72%|#######2 | 7200/10000 [00:09<00:04, 692.07it /s]
1649
+ reward: -2.41 (r0 = -3.17 ), reward eval: reward: -0.01 , reward normalized=-1.76/6.10 , grad norm= 100.06 , loss_value= 349.30 , loss_actor= 11.72 , target value: -12.19 : 80%|######## | 8000/10000 [00:10 <00:03, 623.08it /s]
1650
+ reward: -4.83 (r0 = -3.17 ), reward eval: reward: -0.01 , reward normalized=-2.22/4.89 , grad norm= 68.99 , loss_value= 204.73 , loss_actor= 13.44 , target value: -14.30 : 80%|######## | 8000/10000 [00:11 <00:03, 623.08it /s]
1651
+ reward: -4.83 (r0 = -3.17 ), reward eval: reward: -0.01 , reward normalized=-2.22/4.89 , grad norm= 68.99 , loss_value= 204.73 , loss_actor= 13.44 , target value: -14.30 : 88%|########8 | 8800/10000 [00:11<00:02, 578.83it /s]
1652
+ reward: -4 .41 (r0 = -3.17 ), reward eval: reward: -5.62 , reward normalized=-2.91/4.96 , grad norm= 81.74 , loss_value= 161.88 , loss_actor= 13.33 , target value: -19.36 : 88%|########8 | 8800/10000 [00:14<00:02, 578.83it /s]
1653
+ reward: -4 .41 (r0 = -3.17 ), reward eval: reward: -5.62 , reward normalized=-2.91/4.96 , grad norm= 81.74 , loss_value= 161.88 , loss_actor= 13.33 , target value: -19.36 : 96%|#########6| 9600/10000 [00:15 <00:01, 389.80it /s]
1654
+ reward: -5.39 (r0 = -3.17 ), reward eval: reward: -5.62 , reward normalized=-3.06 /5.23 , grad norm= 193.62 , loss_value= 258.27 , loss_actor= 14.26 , target value: -22.43 : 96%|#########6| 9600/10000 [00:16 <00:01, 389.80it /s]
1655
+ reward: -5.39 (r0 = -3.17 ), reward eval: reward: -5.62 , reward normalized=-3.06 /5.23 , grad norm= 193.62 , loss_value= 258.27 , loss_actor= 14.26 , target value: -22.43 : : 10400it [00:18, 357.06it /s]
1656
+ reward: -4.73 (r0 = -3.17 ), reward eval: reward: -5.62 , reward normalized=-3.74 /4.05 , grad norm= 76.70 , loss_value= 159.92 , loss_actor= 23.20 , target value: -26.13 : : 10400it [00:19, 357.06it /s]
1657
1657
1658
1658
1659
1659
@@ -1723,7 +1723,7 @@ To iterate further on this loss module we might consider:
1723
1723
1724
1724
.. rst-class :: sphx-glr-timing
1725
1725
1726
- **Total running time of the script: ** ( 0 minutes 28.302 seconds)
1726
+ **Total running time of the script: ** ( 0 minutes 29.164 seconds)
1727
1727
1728
1728
1729
1729
.. _sphx_glr_download_advanced_coding_ddpg.py :
0 commit comments