@@ -1634,26 +1634,26 @@ modules we need.
1634
1634
1635
1635
1636
1636
0%| | 0/10000 [00:00<?, ?it/s]
1637
- 8%|8 | 800/10000 [00:00<00:05, 1635.05it /s]
1638
- 16%|#6 | 1600/10000 [00:02<00:17, 488.67it /s]
1639
- 24%|##4 | 2400/10000 [00:03<00:10, 693.90it /s]
1640
- 32%|###2 | 3200/10000 [00:03<00:07, 898.92it /s]
1641
- 40%|#### | 4000/10000 [00:04<00:05, 1078.60it /s]
1642
- 48%|####8 | 4800/10000 [00:04<00:04, 1235 .64it/s]
1643
- 56%|#####6 | 5600/10000 [00:05<00:03, 1360.94it /s]
1644
- reward: -2.67 (r0 = -3.61 ), reward eval: reward: -0.01 , reward normalized=-2.24 /6.42 , grad norm= 121.21 , loss_value= 442.12 , loss_actor= 15.34 , target value: -14.76 : 56%|#####6 | 5600/10000 [00:06<00:03, 1360.94it /s]
1645
- reward: -2.67 (r0 = -3.61 ), reward eval: reward: -0.01 , reward normalized=-2.24 /6.42 , grad norm= 121.21 , loss_value= 442.12 , loss_actor= 15.34 , target value: -14.76 : 64%|######4 | 6400/10000 [00:06<00:03, 932.78it /s]
1646
- reward: -0.15 (r0 = -3.61 ), reward eval: reward: -0.01 , reward normalized=-2.68 /5.62 , grad norm= 95.21 , loss_value= 284.37 , loss_actor= 15.45 , target value: -17.78 : 64%|######4 | 6400/10000 [00:07<00:03, 932.78it /s]
1647
- reward: -0.15 (r0 = -3.61 ), reward eval: reward: -0.01 , reward normalized=-2.68 /5.62 , grad norm= 95.21 , loss_value= 284.37 , loss_actor= 15.45 , target value: -17.78 : 72%|#######2 | 7200/10000 [00:08<00:03, 747.27it /s]
1648
- reward: -3.37 (r0 = -3.61 ), reward eval: reward: -0.01 , reward normalized=-2.59 /6.10 , grad norm= 74.55 , loss_value= 307.46 , loss_actor= 14.67 , target value: -15.21 : 72%|#######2 | 7200/10000 [00:09<00:03, 747.27it /s]
1649
- reward: -3.37 (r0 = -3.61 ), reward eval: reward: -0.01 , reward normalized=-2.59 /6.10 , grad norm= 74.55 , loss_value= 307.46 , loss_actor= 14.67 , target value: -15.21 : 80%|######## | 8000/10000 [00:09 <00:03, 657.24it /s]
1650
- reward: -4.37 (r0 = -3.61 ), reward eval: reward: -0.01 , reward normalized=-2.57/4.64 , grad norm= 48.72 , loss_value= 192.21 , loss_actor= 17.71 , target value: -17.12 : 80%|######## | 8000/10000 [00:10<00:03, 657.24it /s]
1651
- reward: -4.37 (r0 = -3.61 ), reward eval: reward: -0.01 , reward normalized=-2.57/4.64 , grad norm= 48.72 , loss_value= 192.21 , loss_actor= 17.71 , target value: -17.12 : 88%|########8 | 8800/10000 [00:11<00:01, 608.99it /s]
1652
- reward: -5.41 (r0 = -3.61 ), reward eval: reward: -5.49 , reward normalized=-2.94 /5.05 , grad norm= 121.34 , loss_value= 215.46 , loss_actor= 20.00 , target value: -19.67 : 88%|########8 | 8800/10000 [00:14<00:01, 608.99it /s]
1653
- reward: -5.41 (r0 = -3.61 ), reward eval: reward: -5.49 , reward normalized=-2.94 /5.05 , grad norm= 121.34 , loss_value= 215.46 , loss_actor= 20.00 , target value: -19.67 : 96%|#########6| 9600/10000 [00:14 <00:00, 410.18it /s]
1654
- reward: -4.46 (r0 = -3.61 ), reward eval: reward: -5.49 , reward normalized=-3.61 /5.30 , grad norm= 308.88 , loss_value= 336.67 , loss_actor= 19.23 , target value: -26.05 : 96%|#########6| 9600/10000 [00:15<00:00, 410.18it /s]
1655
- reward: -4.46 (r0 = -3.61 ), reward eval: reward: -5.49 , reward normalized=-3.61 /5.30 , grad norm= 308.88 , loss_value= 336.67 , loss_actor= 19.23 , target value: -26.05 : : 10400it [00:17, 363.84it /s]
1656
- reward: -4.49 (r0 = -3.61 ), reward eval: reward: -5.49 , reward normalized=-3.30/3.94 , grad norm= 92.94 , loss_value= 152.10 , loss_actor= 24.58 , target value: -23.26 : : 10400it [00:18, 363.84it /s]
1637
+ 8%|8 | 800/10000 [00:00<00:05, 1780.84it /s]
1638
+ 16%|#6 | 1600/10000 [00:02<00:17, 488.94it /s]
1639
+ 24%|##4 | 2400/10000 [00:03<00:10, 705.84it /s]
1640
+ 32%|###2 | 3200/10000 [00:03<00:07, 907.78it /s]
1641
+ 40%|#### | 4000/10000 [00:04<00:05, 1075.55it /s]
1642
+ 48%|####8 | 4800/10000 [00:04<00:04, 1222 .64it/s]
1643
+ 56%|#####6 | 5600/10000 [00:05<00:03, 1338.15it /s]
1644
+ reward: -2.77 (r0 = -2.01 ), reward eval: reward: -0.00 , reward normalized=-2.40 /6.62 , grad norm= 164.27 , loss_value= 396.87 , loss_actor= 12.97 , target value: -15.44 : 56%|#####6 | 5600/10000 [00:06<00:03, 1338.15it /s]
1645
+ reward: -2.77 (r0 = -2.01 ), reward eval: reward: -0.00 , reward normalized=-2.40 /6.62 , grad norm= 164.27 , loss_value= 396.87 , loss_actor= 12.97 , target value: -15.44 : 64%|######4 | 6400/10000 [00:06<00:04, 897.53it /s]
1646
+ reward: -0.15 (r0 = -2.01 ), reward eval: reward: -0.00 , reward normalized=-2.46 /5.64 , grad norm= 165.72 , loss_value= 260.42 , loss_actor= 13.35 , target value: -15.62 : 64%|######4 | 6400/10000 [00:07<00:04, 897.53it /s]
1647
+ reward: -0.15 (r0 = -2.01 ), reward eval: reward: -0.00 , reward normalized=-2.46 /5.64 , grad norm= 165.72 , loss_value= 260.42 , loss_actor= 13.35 , target value: -15.62 : 72%|#######2 | 7200/10000 [00:08<00:03, 718.68it /s]
1648
+ reward: -2.73 (r0 = -2.01 ), reward eval: reward: -0.00 , reward normalized=-2.34 /6.00 , grad norm= 116.30 , loss_value= 268.66 , loss_actor= 13.29 , target value: -15.24 : 72%|#######2 | 7200/10000 [00:09<00:03, 718.68it /s]
1649
+ reward: -2.73 (r0 = -2.01 ), reward eval: reward: -0.00 , reward normalized=-2.34 /6.00 , grad norm= 116.30 , loss_value= 268.66 , loss_actor= 13.29 , target value: -15.24 : 80%|######## | 8000/10000 [00:10 <00:03, 631.02it /s]
1650
+ reward: -4.72 (r0 = -2.01 ), reward eval: reward: -0.00 , reward normalized=-2.68/5.33 , grad norm= 98.38 , loss_value= 215.18 , loss_actor= 17.06 , target value: -17.23 : 80%|######## | 8000/10000 [00:10<00:03, 631.02it /s]
1651
+ reward: -4.72 (r0 = -2.01 ), reward eval: reward: -0.00 , reward normalized=-2.68/5.33 , grad norm= 98.38 , loss_value= 215.18 , loss_actor= 17.06 , target value: -17.23 : 88%|########8 | 8800/10000 [00:11<00:02, 590.95it /s]
1652
+ reward: -5.42 (r0 = -2.01 ), reward eval: reward: -5.04 , reward normalized=-2.73 /5.36 , grad norm= 65.22 , loss_value= 257.60 , loss_actor= 20.45 , target value: -18.03 : 88%|########8 | 8800/10000 [00:14<00:02, 590.95it /s]
1653
+ reward: -5.42 (r0 = -2.01 ), reward eval: reward: -5.04 , reward normalized=-2.73 /5.36 , grad norm= 65.22 , loss_value= 257.60 , loss_actor= 20.45 , target value: -18.03 : 96%|#########6| 9600/10000 [00:15 <00:01, 399.48it /s]
1654
+ reward: -5.40 (r0 = -2.01 ), reward eval: reward: -5.04 , reward normalized=-3.53 /5.24 , grad norm= 410.03 , loss_value= 340.43 , loss_actor= 19.12 , target value: -24.79 : 96%|#########6| 9600/10000 [00:15<00:01, 399.48it /s]
1655
+ reward: -5.40 (r0 = -2.01 ), reward eval: reward: -5.04 , reward normalized=-3.53 /5.24 , grad norm= 410.03 , loss_value= 340.43 , loss_actor= 19.12 , target value: -24.79 : : 10400it [00:17, 365.12it /s]
1656
+ reward: -4.35 (r0 = -2.01 ), reward eval: reward: -5.04 , reward normalized=-3.57/4.59 , grad norm= 83.81 , loss_value= 232.02 , loss_actor= 22.47 , target value: -25.23 : : 10400it [00:18, 365.12it /s]
1657
1657
1658
1658
1659
1659
@@ -1723,7 +1723,7 @@ To iterate further on this loss module we might consider:
1723
1723
1724
1724
.. rst-class :: sphx-glr-timing
1725
1725
1726
- **Total running time of the script: ** ( 0 minutes 28.313 seconds)
1726
+ **Total running time of the script: ** ( 0 minutes 28.522 seconds)
1727
1727
1728
1728
1729
1729
.. _sphx_glr_download_advanced_coding_ddpg.py :
0 commit comments