@@ -1634,26 +1634,26 @@ modules we need.
1634
1634
1635
1635
1636
1636
0%| | 0/10000 [00:00<?, ?it/s]
1637
- 8%|8 | 800/10000 [00:00<00:05, 1660.50it /s]
1638
- 16%|#6 | 1600/10000 [00:02<00:17, 477.55it /s]
1639
- 24%|##4 | 2400/10000 [00:03<00:11, 674.49it /s]
1640
- 32%|###2 | 3200/10000 [00:04<00:07, 850.24it /s]
1641
- 40%|#### | 4000/10000 [00:04<00:06, 995.16it /s]
1642
- 48%|####8 | 4800/10000 [00:05<00:04, 1116.81it /s]
1643
- 56%|#####6 | 5600/10000 [00:05<00:03, 1208.77it /s]
1644
- reward: -2.34 (r0 = -2.00 ), reward eval: reward: -0.00 , reward normalized=-2.59 /6.17 , grad norm= 147.92 , loss_value= 295.62 , loss_actor= 13.96 , target value: -15.91 : 56%|#####6 | 5600/10000 [00:06<00:03, 1208.77it /s]
1645
- reward: -2.34 (r0 = -2.00 ), reward eval: reward: -0.00 , reward normalized=-2.59 /6.17 , grad norm= 147.92 , loss_value= 295.62 , loss_actor= 13.96 , target value: -15.91 : 64%|######4 | 6400/10000 [00:07<00:04, 841.61it /s]
1646
- reward: -0.11 (r0 = -2.00 ), reward eval: reward: -0.00 , reward normalized=-1.53 /5.46 , grad norm= 118.19 , loss_value= 194.76 , loss_actor= 10.63 , target value: -10.14 : 64%|######4 | 6400/10000 [00:08 <00:04, 841.61it /s]
1647
- reward: -0.11 (r0 = -2.00 ), reward eval: reward: -0.00 , reward normalized=-1.53 /5.46 , grad norm= 118.19 , loss_value= 194.76 , loss_actor= 10.63 , target value: -10.14 : 72%|#######2 | 7200/10000 [00:08<00:03, 700.19it /s]
1648
- reward: -2.33 (r0 = -2.00 ), reward eval: reward: -0.00 , reward normalized=-2.42 /5.58 , grad norm= 182.04 , loss_value= 220.44 , loss_actor= 13.69 , target value: -16.09 : 72%|#######2 | 7200/10000 [00:09<00:03, 700.19it /s]
1649
- reward: -2.33 (r0 = -2.00 ), reward eval: reward: -0.00 , reward normalized=-2.42 /5.58 , grad norm= 182.04 , loss_value= 220.44 , loss_actor= 13.69 , target value: -16.09 : 80%|######## | 8000/10000 [00:10<00:03, 623.68it /s]
1650
- reward: -4.44 (r0 = -2.00 ), reward eval: reward: -0.00 , reward normalized=-2.44/4.89 , grad norm= 111.35 , loss_value= 211.11 , loss_actor= 15.74 , target value: -15.42 : 80%|######## | 8000/10000 [00:11<00:03, 623.68it /s]
1651
- reward: -4.44 (r0 = -2.00 ), reward eval: reward: -0.00 , reward normalized=-2.44/4.89 , grad norm= 111.35 , loss_value= 211.11 , loss_actor= 15.74 , target value: -15.42 : 88%|########8 | 8800/10000 [00:12 <00:02, 588.63it /s]
1652
- reward: -4.96 (r0 = -2.00 ), reward eval: reward: -5.98 , reward normalized=-2.32/4.85 , grad norm= 54.44 , loss_value= 165.27 , loss_actor= 16.38 , target value: -16.11 : 88%|########8 | 8800/10000 [00:14<00:02, 588.63it /s]
1653
- reward: -4.96 (r0 = -2.00 ), reward eval: reward: -5.98 , reward normalized=-2.32/4.85 , grad norm= 54.44 , loss_value= 165.27 , loss_actor= 16.38 , target value: -16.11 : 96%|#########6| 9600/10000 [00:15<00:01, 399.47it /s]
1654
- reward: -4.86 (r0 = -2.00 ), reward eval: reward: -5.98 , reward normalized=-3.02/4.89 , grad norm= 173.10 , loss_value= 234.27 , loss_actor= 13.70 , target value: -21.43 : 96%|#########6| 9600/10000 [00:16 <00:01, 399.47it /s]
1655
- reward: -4.86 (r0 = -2.00 ), reward eval: reward: -5.98 , reward normalized=-3.02/4.89 , grad norm= 173.10 , loss_value= 234.27 , loss_actor= 13.70 , target value: -21.43 : : 10400it [00:18, 364.88it /s]
1656
- reward: -4.93 (r0 = -2.00 ), reward eval: reward: -5.98 , reward normalized=-3.38/3.91 , grad norm= 120.25 , loss_value= 129.44 , loss_actor= 15.23 , target value: -23 .98: : 10400it [00:18, 364.88it /s]
1637
+ 8%|8 | 800/10000 [00:00<00:05, 1683.66it /s]
1638
+ 16%|#6 | 1600/10000 [00:02<00:17, 485.23it /s]
1639
+ 24%|##4 | 2400/10000 [00:03<00:11, 687.00it /s]
1640
+ 32%|###2 | 3200/10000 [00:04<00:07, 877.15it /s]
1641
+ 40%|#### | 4000/10000 [00:04<00:05, 1045.73it /s]
1642
+ 48%|####8 | 4800/10000 [00:05<00:04, 1188.29it /s]
1643
+ 56%|#####6 | 5600/10000 [00:05<00:03, 1288.59it /s]
1644
+ reward: -2.38 (r0 = -3.75 ), reward eval: reward: -0.01 , reward normalized=-2.90 /6.27 , grad norm= 184.34 , loss_value= 346.66 , loss_actor= 15.34 , target value: -18.57 : 56%|#####6 | 5600/10000 [00:06<00:03, 1288.59it /s]
1645
+ reward: -2.38 (r0 = -3.75 ), reward eval: reward: -0.01 , reward normalized=-2.90 /6.27 , grad norm= 184.34 , loss_value= 346.66 , loss_actor= 15.34 , target value: -18.57 : 64%|######4 | 6400/10000 [00:07<00:04, 876.74it /s]
1646
+ reward: -0.14 (r0 = -3.75 ), reward eval: reward: -0.01 , reward normalized=-2.44 /5.85 , grad norm= 49.14 , loss_value= 239.65 , loss_actor= 14.06 , target value: -14.04 : 64%|######4 | 6400/10000 [00:07 <00:04, 876.74it /s]
1647
+ reward: -0.14 (r0 = -3.75 ), reward eval: reward: -0.01 , reward normalized=-2.44 /5.85 , grad norm= 49.14 , loss_value= 239.65 , loss_actor= 14.06 , target value: -14.04 : 72%|#######2 | 7200/10000 [00:08<00:03, 713.38it /s]
1648
+ reward: -2.36 (r0 = -3.75 ), reward eval: reward: -0.01 , reward normalized=-2.26 /5.79 , grad norm= 128.31 , loss_value= 254.09 , loss_actor= 12.59 , target value: -14.58 : 72%|#######2 | 7200/10000 [00:09<00:03, 713.38it /s]
1649
+ reward: -2.36 (r0 = -3.75 ), reward eval: reward: -0.01 , reward normalized=-2.26 /5.79 , grad norm= 128.31 , loss_value= 254.09 , loss_actor= 12.59 , target value: -14.58 : 80%|######## | 8000/10000 [00:10<00:03, 627.39it /s]
1650
+ reward: -4.64 (r0 = -3.75 ), reward eval: reward: -0.01 , reward normalized=-3.11/5.26 , grad norm= 115.52 , loss_value= 239.16 , loss_actor= 18.21 , target value: -19.89 : 80%|######## | 8000/10000 [00:11<00:03, 627.39it /s]
1651
+ reward: -4.64 (r0 = -3.75 ), reward eval: reward: -0.01 , reward normalized=-3.11/5.26 , grad norm= 115.52 , loss_value= 239.16 , loss_actor= 18.21 , target value: -19.89 : 88%|########8 | 8800/10000 [00:11 <00:02, 582.30it /s]
1652
+ reward: -4.99 (r0 = -3.75 ), reward eval: reward: -5.59 , reward normalized=-3.11/5.35 , grad norm= 68.47 , loss_value= 219.28 , loss_actor= 19.75 , target value: -19.78 : 88%|########8 | 8800/10000 [00:14<00:02, 582.30it /s]
1653
+ reward: -4.99 (r0 = -3.75 ), reward eval: reward: -5.59 , reward normalized=-3.11/5.35 , grad norm= 68.47 , loss_value= 219.28 , loss_actor= 19.75 , target value: -19.78 : 96%|#########6| 9600/10000 [00:15<00:00, 404.39it /s]
1654
+ reward: -5.39 (r0 = -3.75 ), reward eval: reward: -5.59 , reward normalized=-3.21/5.31 , grad norm= 204.79 , loss_value= 298.20 , loss_actor= 19.41 , target value: -22.63 : 96%|#########6| 9600/10000 [00:15 <00:00, 404.39it /s]
1655
+ reward: -5.39 (r0 = -3.75 ), reward eval: reward: -5.59 , reward normalized=-3.21/5.31 , grad norm= 204.79 , loss_value= 298.20 , loss_actor= 19.41 , target value: -22.63 : : 10400it [00:17, 366.15it /s]
1656
+ reward: -4.68 (r0 = -3.75 ), reward eval: reward: -5.59 , reward normalized=-3.57/4.27 , grad norm= 74.57 , loss_value= 203.88 , loss_actor= 23.34 , target value: -24 .98: : 10400it [00:18, 366.15it /s]
1657
1657
1658
1658
1659
1659
@@ -1723,7 +1723,7 @@ To iterate further on this loss module we might consider:
1723
1723
1724
1724
.. rst-class :: sphx-glr-timing
1725
1725
1726
- **Total running time of the script: ** ( 0 minutes 29.005 seconds)
1726
+ **Total running time of the script: ** ( 0 minutes 28.797 seconds)
1727
1727
1728
1728
1729
1729
.. _sphx_glr_download_advanced_coding_ddpg.py :
0 commit comments