@@ -1634,26 +1634,26 @@ modules we need.
1634
1634
1635
1635
1636
1636
0%| | 0/10000 [00:00<?, ?it/s]
1637
- 8%|8 | 800/10000 [00:00<00:05, 1703.98it /s]
1638
- 16%|#6 | 1600/10000 [00:02<00:17, 484.88it /s]
1639
- 24%|##4 | 2400/10000 [00:03<00:10, 699.40it /s]
1640
- 32%|###2 | 3200/10000 [00:03 <00:07, 901.42it /s]
1641
- 40%|#### | 4000/10000 [00:04<00:05, 1070.69it /s]
1642
- 48%|####8 | 4800/10000 [00:04 <00:04, 1219.46it /s]
1643
- 56%|#####6 | 5600/10000 [00:05<00:03, 1336.30it /s]
1644
- reward: -2.08 (r0 = -3.42 ), reward eval: reward: 0.00, reward normalized=-2.60 /6.39 , grad norm= 100.00 , loss_value= 431.35 , loss_actor= 16.48 , target value: -15.57 : 56%|#####6 | 5600/10000 [00:06<00:03, 1336.30it /s]
1645
- reward: -2.08 (r0 = -3.42 ), reward eval: reward: 0.00, reward normalized=-2.60 /6.39 , grad norm= 100.00 , loss_value= 431.35 , loss_actor= 16.48 , target value: -15.57 : 64%|######4 | 6400/10000 [00:06 <00:03, 901.23it /s]
1646
- reward: -0.20 (r0 = -3.42 ), reward eval: reward: 0.00, reward normalized=-3.16/6.03 , grad norm= 379.46 , loss_value= 354.63 , loss_actor= 14.92 , target value: -19.87 : 64%|######4 | 6400/10000 [00:07 <00:03, 901.23it /s]
1647
- reward: -0.20 (r0 = -3.42 ), reward eval: reward: 0.00, reward normalized=-3.16/6.03 , grad norm= 379.46 , loss_value= 354.63 , loss_actor= 14.92 , target value: -19.87 : 72%|#######2 | 7200/10000 [00:08<00:03, 740.79it /s]
1648
- reward: -3.19 (r0 = -3.42 ), reward eval: reward: 0.00, reward normalized=-1.95/6.12 , grad norm= 78.80 , loss_value= 296.14 , loss_actor= 11.28 , target value: -11.78 : 72%|#######2 | 7200/10000 [00:09<00:03, 740.79it /s]
1649
- reward: -3.19 (r0 = -3.42 ), reward eval: reward: 0.00, reward normalized=-1.95/6.12 , grad norm= 78.80 , loss_value= 296.14 , loss_actor= 11.28 , target value: -11.78 : 80%|######## | 8000/10000 [00:10<00:03, 652.77it /s]
1650
- reward: -4.73 (r0 = -3.42 ), reward eval: reward: 0.00, reward normalized=-2.86/5.35 , grad norm= 79.71 , loss_value= 225.52 , loss_actor= 19.22 , target value: -18.85 : 80%|######## | 8000/10000 [00:10 <00:03, 652.77it /s]
1651
- reward: -4.73 (r0 = -3.42 ), reward eval: reward: 0.00, reward normalized=-2.86/5.35 , grad norm= 79.71 , loss_value= 225.52 , loss_actor= 19.22 , target value: -18.85 : 88%|########8 | 8800/10000 [00:11 <00:01, 609.61it /s]
1652
- reward: -5.48 (r0 = -3.42 ), reward eval: reward: -5.46 , reward normalized=-3.25/5.19 , grad norm= 207.90 , loss_value= 237.13 , loss_actor= 21.20 , target value: -22.06 : 88%|########8 | 8800/10000 [00:14<00:01, 609.61it /s]
1653
- reward: -5.48 (r0 = -3.42 ), reward eval: reward: -5.46 , reward normalized=-3.25/5.19 , grad norm= 207.90 , loss_value= 237.13 , loss_actor= 21.20 , target value: -22.06 : 96%|#########6| 9600/10000 [00:14 <00:00, 406.16it /s]
1654
- reward: -5.29 (r0 = -3.42 ), reward eval: reward: -5.46 , reward normalized=-2.87 /4.98 , grad norm= 54.30 , loss_value= 193.69 , loss_actor= 20.55 , target value: -20.42 : 96%|#########6| 9600/10000 [00:15 <00:00, 406.16it /s]
1655
- reward: -5.29 (r0 = -3.42 ), reward eval: reward: -5.46 , reward normalized=-2.87 /4.98 , grad norm= 54.30 , loss_value= 193.69 , loss_actor= 20.55 , target value: -20.42 : : 10400it [00:17, 363.51it /s]
1656
- reward: -4.67 (r0 = -3.42 ), reward eval: reward: -5.46 , reward normalized=-3.58/4.46 , grad norm= 70.11 , loss_value= 183.36 , loss_actor= 23.07 , target value: -25.11 : : 10400it [00:18, 363.51it /s]
1637
+ 8%|8 | 800/10000 [00:00<00:05, 1660.50it /s]
1638
+ 16%|#6 | 1600/10000 [00:02<00:17, 477.55it /s]
1639
+ 24%|##4 | 2400/10000 [00:03<00:11, 674.49it /s]
1640
+ 32%|###2 | 3200/10000 [00:04 <00:07, 850.24it /s]
1641
+ 40%|#### | 4000/10000 [00:04<00:06, 995.16it /s]
1642
+ 48%|####8 | 4800/10000 [00:05 <00:04, 1116.81it /s]
1643
+ 56%|#####6 | 5600/10000 [00:05<00:03, 1208.77it /s]
1644
+ reward: -2.34 (r0 = -2.00 ), reward eval: reward: - 0.00, reward normalized=-2.59 /6.17 , grad norm= 147.92 , loss_value= 295.62 , loss_actor= 13.96 , target value: -15.91 : 56%|#####6 | 5600/10000 [00:06<00:03, 1208.77it /s]
1645
+ reward: -2.34 (r0 = -2.00 ), reward eval: reward: - 0.00, reward normalized=-2.59 /6.17 , grad norm= 147.92 , loss_value= 295.62 , loss_actor= 13.96 , target value: -15.91 : 64%|######4 | 6400/10000 [00:07 <00:04, 841.61it /s]
1646
+ reward: -0.11 (r0 = -2.00 ), reward eval: reward: - 0.00, reward normalized=-1.53/5.46 , grad norm= 118.19 , loss_value= 194.76 , loss_actor= 10.63 , target value: -10.14 : 64%|######4 | 6400/10000 [00:08 <00:04, 841.61it /s]
1647
+ reward: -0.11 (r0 = -2.00 ), reward eval: reward: - 0.00, reward normalized=-1.53/5.46 , grad norm= 118.19 , loss_value= 194.76 , loss_actor= 10.63 , target value: -10.14 : 72%|#######2 | 7200/10000 [00:08<00:03, 700.19it /s]
1648
+ reward: -2.33 (r0 = -2.00 ), reward eval: reward: - 0.00, reward normalized=-2.42/5.58 , grad norm= 182.04 , loss_value= 220.44 , loss_actor= 13.69 , target value: -16.09 : 72%|#######2 | 7200/10000 [00:09<00:03, 700.19it /s]
1649
+ reward: -2.33 (r0 = -2.00 ), reward eval: reward: - 0.00, reward normalized=-2.42/5.58 , grad norm= 182.04 , loss_value= 220.44 , loss_actor= 13.69 , target value: -16.09 : 80%|######## | 8000/10000 [00:10<00:03, 623.68it /s]
1650
+ reward: -4.44 (r0 = -2.00 ), reward eval: reward: - 0.00, reward normalized=-2.44/4.89 , grad norm= 111.35 , loss_value= 211.11 , loss_actor= 15.74 , target value: -15.42 : 80%|######## | 8000/10000 [00:11 <00:03, 623.68it /s]
1651
+ reward: -4.44 (r0 = -2.00 ), reward eval: reward: - 0.00, reward normalized=-2.44/4.89 , grad norm= 111.35 , loss_value= 211.11 , loss_actor= 15.74 , target value: -15.42 : 88%|########8 | 8800/10000 [00:12 <00:02, 588.63it /s]
1652
+ reward: -4.96 (r0 = -2.00 ), reward eval: reward: -5.98 , reward normalized=-2.32/4.85 , grad norm= 54.44 , loss_value= 165.27 , loss_actor= 16.38 , target value: -16.11 : 88%|########8 | 8800/10000 [00:14<00:02, 588.63it /s]
1653
+ reward: -4.96 (r0 = -2.00 ), reward eval: reward: -5.98 , reward normalized=-2.32/4.85 , grad norm= 54.44 , loss_value= 165.27 , loss_actor= 16.38 , target value: -16.11 : 96%|#########6| 9600/10000 [00:15 <00:01, 399.47it /s]
1654
+ reward: -4.86 (r0 = -2.00 ), reward eval: reward: -5.98 , reward normalized=-3.02 /4.89 , grad norm= 173.10 , loss_value= 234.27 , loss_actor= 13.70 , target value: -21.43 : 96%|#########6| 9600/10000 [00:16 <00:01, 399.47it /s]
1655
+ reward: -4.86 (r0 = -2.00 ), reward eval: reward: -5.98 , reward normalized=-3.02 /4.89 , grad norm= 173.10 , loss_value= 234.27 , loss_actor= 13.70 , target value: -21.43 : : 10400it [00:18, 364.88it /s]
1656
+ reward: -4.93 (r0 = -2.00 ), reward eval: reward: -5.98 , reward normalized=-3.38/3.91 , grad norm= 120.25 , loss_value= 129.44 , loss_actor= 15.23 , target value: -23.98 : : 10400it [00:18, 364.88it /s]
1657
1657
1658
1658
1659
1659
@@ -1723,7 +1723,7 @@ To iterate further on this loss module we might consider:
1723
1723
1724
1724
.. rst-class :: sphx-glr-timing
1725
1725
1726
- **Total running time of the script: ** ( 0 minutes 28.464 seconds)
1726
+ **Total running time of the script: ** ( 0 minutes 29.005 seconds)
1727
1727
1728
1728
1729
1729
.. _sphx_glr_download_advanced_coding_ddpg.py :
0 commit comments