@@ -1634,26 +1634,26 @@ modules we need.
1634
1634
1635
1635
1636
1636
0%| | 0/10000 [00:00<?, ?it/s]
1637
- 8%|8 | 800/10000 [00:00<00:05, 1680.42it /s]
1638
- 16%|#6 | 1600/10000 [00:03 <00:17, 471.71it /s]
1639
- 24%|##4 | 2400/10000 [00:03<00:11, 665.05it /s]
1640
- 32%|###2 | 3200/10000 [00:04<00:07, 856.11it /s]
1641
- 40%|#### | 4000/10000 [00:04<00:05, 1027.08it /s]
1642
- 48%|####8 | 4800/10000 [00:05<00:04, 1166.85it /s]
1643
- 56%|#####6 | 5600/10000 [00:05<00:03, 1277.85it /s]
1644
- reward: -2.85 (r0 = -3.05 ), reward eval: reward: 0.00 , reward normalized=-2.54 /6.66 , grad norm= 155.46 , loss_value= 513.11 , loss_actor= 18.16 , target value: -14.97 : 56%|#####6 | 5600/10000 [00:06<00:03, 1277.85it /s]
1645
- reward: -2.85 (r0 = -3.05 ), reward eval: reward: 0.00 , reward normalized=-2.54 /6.66 , grad norm= 155.46 , loss_value= 513.11 , loss_actor= 18.16 , target value: -14.97 : 64%|######4 | 6400/10000 [00:07<00:04, 849.73it /s]
1646
- reward: -0.17 (r0 = -3.05 ), reward eval: reward: 0.00 , reward normalized=-2.64 /5.64 , grad norm= 91.66 , loss_value= 263.18 , loss_actor= 15.17 , target value: -16.35 : 64%|######4 | 6400/10000 [00:08 <00:04, 849.73it /s]
1647
- reward: -0.17 (r0 = -3.05 ), reward eval: reward: 0.00 , reward normalized=-2.64 /5.64 , grad norm= 91.66 , loss_value= 263.18 , loss_actor= 15.17 , target value: -16.35 : 72%|#######2 | 7200/10000 [00:08<00:03, 704.46it /s]
1648
- reward: -2.90 (r0 = -3.05 ), reward eval: reward: 0.00 , reward normalized=-3.10/6.28 , grad norm= 337.43 , loss_value= 382.46 , loss_actor= 14.87 , target value: -19.26 : 72%|#######2 | 7200/10000 [00:09<00:03, 704.46it /s]
1649
- reward: -2.90 (r0 = -3.05 ), reward eval: reward: 0.00 , reward normalized=-3.10/6.28 , grad norm= 337.43 , loss_value= 382.46 , loss_actor= 14.87 , target value: -19.26 : 80%|######## | 8000/10000 [00:10<00:03, 620.21it /s]
1650
- reward: -4.48 (r0 = -3.05 ), reward eval: reward: 0.00 , reward normalized=-2.83 /5.17 , grad norm= 155.36 , loss_value= 266.41 , loss_actor= 16.28 , target value: -18.26 : 80%|######## | 8000/10000 [00:11 <00:03, 620.21it /s]
1651
- reward: -4.48 (r0 = -3.05 ), reward eval: reward: 0.00 , reward normalized=-2.83 /5.17 , grad norm= 155.36 , loss_value= 266.41 , loss_actor= 16.28 , target value: -18.26 : 88%|########8 | 8800/10000 [00:12 <00:02, 582.56it /s]
1652
- reward: -5.43 (r0 = -3.05 ), reward eval: reward: -5.34 , reward normalized=-2.46 /5.01 , grad norm= 233.46 , loss_value= 207.46 , loss_actor= 20.07 , target value: -16.09 : 88%|########8 | 8800/10000 [00:14<00:02, 582.56it /s]
1653
- reward: -5.43 (r0 = -3.05 ), reward eval: reward: -5.34 , reward normalized=-2.46 /5.01 , grad norm= 233.46 , loss_value= 207.46 , loss_actor= 20.07 , target value: -16.09 : 96%|#########6| 9600/10000 [00:15<00:01, 392.16it /s]
1654
- reward: -5.40 (r0 = -3.05 ), reward eval: reward: -5.34 , reward normalized=-3.72 /5.35 , grad norm= 273.07 , loss_value= 350.02 , loss_actor= 21.10 , target value: -25.81 : 96%|#########6| 9600/10000 [00:16 <00:01, 392.16it /s]
1655
- reward: -5.40 (r0 = -3.05 ), reward eval: reward: -5.34 , reward normalized=-3.72 /5.35 , grad norm= 273.07 , loss_value= 350.02 , loss_actor= 21.10 , target value: -25.81 : : 10400it [00:18, 360.77it /s]
1656
- reward: -4.90 (r0 = -3.05 ), reward eval: reward: -5.34 , reward normalized=-4.18 /4.47 , grad norm= 120.83 , loss_value= 254.43 , loss_actor= 27.65 , target value: -28.47 : : 10400it [00:19, 360.77it /s]
1637
+ 8%|8 | 800/10000 [00:00<00:05, 1732.40it /s]
1638
+ 16%|#6 | 1600/10000 [00:02 <00:17, 488.68it /s]
1639
+ 24%|##4 | 2400/10000 [00:03<00:10, 693.03it /s]
1640
+ 32%|###2 | 3200/10000 [00:04<00:07, 878.39it /s]
1641
+ 40%|#### | 4000/10000 [00:04<00:05, 1035.33it /s]
1642
+ 48%|####8 | 4800/10000 [00:05<00:04, 1177.04it /s]
1643
+ 56%|#####6 | 5600/10000 [00:05<00:03, 1283.39it /s]
1644
+ reward: -2.12 (r0 = -2.92 ), reward eval: reward: -0.01 , reward normalized=-2.19 /6.34 , grad norm= 169.82 , loss_value= 407.42 , loss_actor= 15.43 , target value: -13.32 : 56%|#####6 | 5600/10000 [00:06<00:03, 1283.39it /s]
1645
+ reward: -2.12 (r0 = -2.92 ), reward eval: reward: -0.01 , reward normalized=-2.19 /6.34 , grad norm= 169.82 , loss_value= 407.42 , loss_actor= 15.43 , target value: -13.32 : 64%|######4 | 6400/10000 [00:07<00:04, 877.86it /s]
1646
+ reward: -0.22 (r0 = -2.92 ), reward eval: reward: -0.01 , reward normalized=-2.38 /5.77 , grad norm= 73.76 , loss_value= 250.94 , loss_actor= 14.13 , target value: -16.05 : 64%|######4 | 6400/10000 [00:07 <00:04, 877.86it /s]
1647
+ reward: -0.22 (r0 = -2.92 ), reward eval: reward: -0.01 , reward normalized=-2.38 /5.77 , grad norm= 73.76 , loss_value= 250.94 , loss_actor= 14.13 , target value: -16.05 : 72%|#######2 | 7200/10000 [00:08<00:03, 723.38it /s]
1648
+ reward: -3.28 (r0 = -2.92 ), reward eval: reward: -0.01 , reward normalized=-2.41/5.98 , grad norm= 135.06 , loss_value= 295.38 , loss_actor= 13.13 , target value: -15.44 : 72%|#######2 | 7200/10000 [00:09<00:03, 723.38it /s]
1649
+ reward: -3.28 (r0 = -2.92 ), reward eval: reward: -0.01 , reward normalized=-2.41/5.98 , grad norm= 135.06 , loss_value= 295.38 , loss_actor= 13.13 , target value: -15.44 : 80%|######## | 8000/10000 [00:10<00:03, 642.40it /s]
1650
+ reward: -4.93 (r0 = -2.92 ), reward eval: reward: -0.01 , reward normalized=-2.81 /5.28 , grad norm= 257.19 , loss_value= 257.16 , loss_actor= 14.93 , target value: -18.10 : 80%|######## | 8000/10000 [00:10 <00:03, 642.40it /s]
1651
+ reward: -4.93 (r0 = -2.92 ), reward eval: reward: -0.01 , reward normalized=-2.81 /5.28 , grad norm= 257.19 , loss_value= 257.16 , loss_actor= 14.93 , target value: -18.10 : 88%|########8 | 8800/10000 [00:11 <00:02, 598.47it /s]
1652
+ reward: -4.99 (r0 = -2.92 ), reward eval: reward: -5.87 , reward normalized=-3.09 /5.10 , grad norm= 126.61 , loss_value= 224.68 , loss_actor= 19.46 , target value: -20.96 : 88%|########8 | 8800/10000 [00:14<00:02, 598.47it /s]
1653
+ reward: -4.99 (r0 = -2.92 ), reward eval: reward: -5.87 , reward normalized=-3.09 /5.10 , grad norm= 126.61 , loss_value= 224.68 , loss_actor= 19.46 , target value: -20.96 : 96%|#########6| 9600/10000 [00:15<00:00, 404.31it /s]
1654
+ reward: -5.12 (r0 = -2.92 ), reward eval: reward: -5.87 , reward normalized=-2.99 /5.42 , grad norm= 104.60 , loss_value= 239.53 , loss_actor= 20.24 , target value: -21.99 : 96%|#########6| 9600/10000 [00:15 <00:00, 404.31it /s]
1655
+ reward: -5.12 (r0 = -2.92 ), reward eval: reward: -5.87 , reward normalized=-2.99 /5.42 , grad norm= 104.60 , loss_value= 239.53 , loss_actor= 20.24 , target value: -21.99 : : 10400it [00:18, 356.24it /s]
1656
+ reward: -4.86 (r0 = -2.92 ), reward eval: reward: -5.87 , reward normalized=-3.77 /4.27 , grad norm= 89.54 , loss_value= 168.68 , loss_actor= 23.35 , target value: -25.89 : : 10400it [00:18, 356.24it /s]
1657
1657
1658
1658
1659
1659
@@ -1723,7 +1723,7 @@ To iterate further on this loss module we might consider:
1723
1723
1724
1724
.. rst-class :: sphx-glr-timing
1725
1725
1726
- **Total running time of the script: ** ( 0 minutes 29.113 seconds)
1726
+ **Total running time of the script: ** ( 0 minutes 28.909 seconds)
1727
1727
1728
1728
1729
1729
.. _sphx_glr_download_advanced_coding_ddpg.py :
0 commit comments