@@ -1634,26 +1634,26 @@ modules we need.
1634
1634
1635
1635
1636
1636
0%| | 0/10000 [00:00<?, ?it/s]
1637
- 8%|8 | 800/10000 [00:00<00:05, 1707.64it /s]
1638
- 16%|#6 | 1600/10000 [00:02<00:17, 488.81it /s]
1639
- 24%|##4 | 2400/10000 [00:03<00:10, 699.38it /s]
1640
- 32%|###2 | 3200/10000 [00:03<00:07, 897.49it /s]
1641
- 40%|#### | 4000/10000 [00:04<00:05, 1060.15it /s]
1642
- 48%|####8 | 4800/10000 [00:05 <00:04, 1191.03it /s]
1643
- 56%|#####6 | 5600/10000 [00:05<00:03, 1296.38it /s]
1644
- reward: -2.14 (r0 = -2.78 ), reward eval: reward: -0.01 , reward normalized=-2.08 /6.19 , grad norm= 234.64 , loss_value= 349.42 , loss_actor= 15.13 , target value: -12.18 : 56%|#####6 | 5600/10000 [00:06<00:03, 1296.38it /s]
1645
- reward: -2.14 (r0 = -2.78 ), reward eval: reward: -0.01 , reward normalized=-2.08 /6.19 , grad norm= 234.64 , loss_value= 349.42 , loss_actor= 15.13 , target value: -12.18 : 64%|######4 | 6400/10000 [00:07 <00:04, 889.15it /s]
1646
- reward: -0.19 (r0 = -2.78 ), reward eval: reward: -0.01 , reward normalized=-2.52/5.60 , grad norm= 99.21 , loss_value= 254.70 , loss_actor= 14.22 , target value: -16.14 : 64%|######4 | 6400/10000 [00:07<00:04, 889.15it /s]
1647
- reward: -0.19 (r0 = -2.78 ), reward eval: reward: -0.01 , reward normalized=-2.52/5.60 , grad norm= 99.21 , loss_value= 254.70 , loss_actor= 14.22 , target value: -16.14 : 72%|#######2 | 7200/10000 [00:08<00:03, 722.82it /s]
1648
- reward: -1.72 (r0 = -2.78 ), reward eval: reward: -0.01 , reward normalized=-2.27/5.64 , grad norm= 60.57 , loss_value= 231.16 , loss_actor= 11.37 , target value: -13.73 : 72%|#######2 | 7200/10000 [00:09<00:03, 722.82it /s]
1649
- reward: -1.72 (r0 = -2.78 ), reward eval: reward: -0.01 , reward normalized=-2.27/5.64 , grad norm= 60.57 , loss_value= 231.16 , loss_actor= 11.37 , target value: -13.73 : 80%|######## | 8000/10000 [00:10<00:03, 641.42it /s]
1650
- reward: -3.97 (r0 = -2.78 ), reward eval: reward: -0.01 , reward normalized=-2.10 /5.46 , grad norm= 76.46 , loss_value= 310.34 , loss_actor= 16.33 , target value: -14.06 : 80%|######## | 8000/10000 [00:10<00:03, 641.42it /s]
1651
- reward: -3.97 (r0 = -2.78 ), reward eval: reward: -0.01 , reward normalized=-2.10 /5.46 , grad norm= 76.46 , loss_value= 310.34 , loss_actor= 16.33 , target value: -14.06 : 88%|########8 | 8800/10000 [00:11<00:02, 598.58it /s]
1652
- reward: -4.50 (r0 = -2.78 ), reward eval: reward: -2.28 , reward normalized=-2.83 /5.53 , grad norm= 143.03 , loss_value= 314.16 , loss_actor= 19.17 , target value: -18.72 : 88%|########8 | 8800/10000 [00:14<00:02, 598.58it /s]
1653
- reward: -4.50 (r0 = -2.78 ), reward eval: reward: -2.28 , reward normalized=-2.83 /5.53 , grad norm= 143.03 , loss_value= 314.16 , loss_actor= 19.17 , target value: -18.72 : 96%|#########6| 9600/10000 [00:15 <00:00, 404.64it /s]
1654
- reward: -4.74 (r0 = -2.78 ), reward eval: reward: -2.28 , reward normalized=-3.13 /4.99 , grad norm= 236.04 , loss_value= 236.55 , loss_actor= 14.05 , target value: -22.38 : 96%|#########6| 9600/10000 [00:15<00:00, 404.64it /s]
1655
- reward: -4.74 (r0 = -2.78 ), reward eval: reward: -2.28 , reward normalized=-3.13 /4.99 , grad norm= 236.04 , loss_value= 236.55 , loss_actor= 14.05 , target value: -22.38 : : 10400it [00:17, 358.82it /s]
1656
- reward: -2.65 (r0 = -2.78 ), reward eval: reward: -2.28 , reward normalized=-2.90 /4.16 , grad norm= 76.19 , loss_value= 143.21 , loss_actor= 12.72 , target value: -20.87 : : 10400it [00:18, 358.82it /s]
1637
+ 8%|8 | 800/10000 [00:00<00:05, 1703.98it /s]
1638
+ 16%|#6 | 1600/10000 [00:02<00:17, 484.88it /s]
1639
+ 24%|##4 | 2400/10000 [00:03<00:10, 699.40it /s]
1640
+ 32%|###2 | 3200/10000 [00:03<00:07, 901.42it /s]
1641
+ 40%|#### | 4000/10000 [00:04<00:05, 1070.69it /s]
1642
+ 48%|####8 | 4800/10000 [00:04 <00:04, 1219.46it /s]
1643
+ 56%|#####6 | 5600/10000 [00:05<00:03, 1336.30it /s]
1644
+ reward: -2.08 (r0 = -3.42 ), reward eval: reward: 0.00 , reward normalized=-2.60 /6.39 , grad norm= 100.00 , loss_value= 431.35 , loss_actor= 16.48 , target value: -15.57 : 56%|#####6 | 5600/10000 [00:06<00:03, 1336.30it /s]
1645
+ reward: -2.08 (r0 = -3.42 ), reward eval: reward: 0.00 , reward normalized=-2.60 /6.39 , grad norm= 100.00 , loss_value= 431.35 , loss_actor= 16.48 , target value: -15.57 : 64%|######4 | 6400/10000 [00:06 <00:03, 901.23it /s]
1646
+ reward: -0.20 (r0 = -3.42 ), reward eval: reward: 0.00 , reward normalized=-3.16/6.03 , grad norm= 379.46 , loss_value= 354.63 , loss_actor= 14.92 , target value: -19.87 : 64%|######4 | 6400/10000 [00:07<00:03, 901.23it /s]
1647
+ reward: -0.20 (r0 = -3.42 ), reward eval: reward: 0.00 , reward normalized=-3.16/6.03 , grad norm= 379.46 , loss_value= 354.63 , loss_actor= 14.92 , target value: -19.87 : 72%|#######2 | 7200/10000 [00:08<00:03, 740.79it /s]
1648
+ reward: -3.19 (r0 = -3.42 ), reward eval: reward: 0.00 , reward normalized=-1.95/6.12 , grad norm= 78.80 , loss_value= 296.14 , loss_actor= 11.28 , target value: -11.78 : 72%|#######2 | 7200/10000 [00:09<00:03, 740.79it /s]
1649
+ reward: -3.19 (r0 = -3.42 ), reward eval: reward: 0.00 , reward normalized=-1.95/6.12 , grad norm= 78.80 , loss_value= 296.14 , loss_actor= 11.28 , target value: -11.78 : 80%|######## | 8000/10000 [00:10<00:03, 652.77it /s]
1650
+ reward: -4.73 (r0 = -3.42 ), reward eval: reward: 0.00 , reward normalized=-2.86 /5.35 , grad norm= 79.71 , loss_value= 225.52 , loss_actor= 19.22 , target value: -18.85 : 80%|######## | 8000/10000 [00:10<00:03, 652.77it /s]
1651
+ reward: -4.73 (r0 = -3.42 ), reward eval: reward: 0.00 , reward normalized=-2.86 /5.35 , grad norm= 79.71 , loss_value= 225.52 , loss_actor= 19.22 , target value: -18.85 : 88%|########8 | 8800/10000 [00:11<00:01, 609.61it /s]
1652
+ reward: -5.48 (r0 = -3.42 ), reward eval: reward: -5.46 , reward normalized=-3.25 /5.19 , grad norm= 207.90 , loss_value= 237.13 , loss_actor= 21.20 , target value: -22.06 : 88%|########8 | 8800/10000 [00:14<00:01, 609.61it /s]
1653
+ reward: -5.48 (r0 = -3.42 ), reward eval: reward: -5.46 , reward normalized=-3.25 /5.19 , grad norm= 207.90 , loss_value= 237.13 , loss_actor= 21.20 , target value: -22.06 : 96%|#########6| 9600/10000 [00:14 <00:00, 406.16it /s]
1654
+ reward: -5.29 (r0 = -3.42 ), reward eval: reward: -5.46 , reward normalized=-2.87 /4.98 , grad norm= 54.30 , loss_value= 193.69 , loss_actor= 20.55 , target value: -20.42 : 96%|#########6| 9600/10000 [00:15<00:00, 406.16it /s]
1655
+ reward: -5.29 (r0 = -3.42 ), reward eval: reward: -5.46 , reward normalized=-2.87 /4.98 , grad norm= 54.30 , loss_value= 193.69 , loss_actor= 20.55 , target value: -20.42 : : 10400it [00:17, 363.51it /s]
1656
+ reward: -4.67 (r0 = -3.42 ), reward eval: reward: -5.46 , reward normalized=-3.58 /4.46 , grad norm= 70.11 , loss_value= 183.36 , loss_actor= 23.07 , target value: -25.11 : : 10400it [00:18, 363.51it /s]
1657
1657
1658
1658
1659
1659
@@ -1723,7 +1723,7 @@ To iterate further on this loss module we might consider:
1723
1723
1724
1724
.. rst-class :: sphx-glr-timing
1725
1725
1726
- **Total running time of the script: ** ( 0 minutes 28.713 seconds)
1726
+ **Total running time of the script: ** ( 0 minutes 28.464 seconds)
1727
1727
1728
1728
1729
1729
.. _sphx_glr_download_advanced_coding_ddpg.py :
0 commit comments