@@ -1634,26 +1634,26 @@ modules we need.
1634
1634
1635
1635
1636
1636
0%| | 0/10000 [00:00<?, ?it/s]
1637
- 8%|8 | 800/10000 [00:00<00:05, 1706.86it /s]
1638
- 16%|#6 | 1600/10000 [00:02 <00:16, 494.56it /s]
1639
- 24%|##4 | 2400/10000 [00:03<00:10, 714.14it /s]
1640
- 32%|###2 | 3200/10000 [00:03 <00:07, 927.36it /s]
1641
- 40%|#### | 4000/10000 [00:04<00:05, 1108.64it /s]
1642
- 48%|####8 | 4800/10000 [00:04 <00:04, 1263.34it /s]
1643
- 56%|#####6 | 5600/10000 [00:05<00:03, 1386.67it /s]
1644
- reward: -2.53 (r0 = -2.38 ), reward eval: reward: 0.00 , reward normalized=-2.87 /6.26 , grad norm= 172.44 , loss_value= 372.48 , loss_actor= 13.77 , target value: -17.13 : 56%|#####6 | 5600/10000 [00:06<00:03, 1386.67it /s]
1645
- reward: -2.53 (r0 = -2.38 ), reward eval: reward: 0.00 , reward normalized=-2.87 /6.26 , grad norm= 172.44 , loss_value= 372.48 , loss_actor= 13.77 , target value: -17.13 : 64%|######4 | 6400/10000 [00:06 <00:03, 914.00it /s]
1646
- reward: -0.10 (r0 = -2.38 ), reward eval: reward: 0.00 , reward normalized=-1.77 /5.75 , grad norm= 83.74 , loss_value= 243.26 , loss_actor= 12.18 , target value: -11.92 : 64%|######4 | 6400/10000 [00:07<00:03, 914.00it /s]
1647
- reward: -0.10 (r0 = -2.38 ), reward eval: reward: 0.00 , reward normalized=-1.77 /5.75 , grad norm= 83.74 , loss_value= 243.26 , loss_actor= 12.18 , target value: -11.92 : 72%|#######2 | 7200/10000 [00:08<00:03, 732.98it /s]
1648
- reward: -3.06 (r0 = -2.38 ), reward eval: reward: 0.00 , reward normalized=-2.10/6.37 , grad norm= 175.25 , loss_value= 410.52 , loss_actor= 14.70 , target value: -14.73 : 72%|#######2 | 7200/10000 [00:09<00:03, 732.98it /s]
1649
- reward: -3.06 (r0 = -2.38 ), reward eval: reward: 0.00 , reward normalized=-2.10/6.37 , grad norm= 175.25 , loss_value= 410.52 , loss_actor= 14.70 , target value: -14.73 : 80%|######## | 8000/10000 [00:09 <00:03, 648.25it /s]
1650
- reward: -5 .12 (r0 = -2.38 ), reward eval: reward: 0.00 , reward normalized=-2.76 /5.11 , grad norm= 204.29 , loss_value= 256.34 , loss_actor= 13.80 , target value: -18.21 : 80%|######## | 8000/10000 [00:10 <00:03, 648.25it /s]
1651
- reward: -5 .12 (r0 = -2.38 ), reward eval: reward: 0.00 , reward normalized=-2.76 /5.11 , grad norm= 204.29 , loss_value= 256.34 , loss_actor= 13.80 , target value: -18.21 : 88%|########8 | 8800/10000 [00:11 <00:01, 603.43it /s]
1652
- reward: -4.34 (r0 = -2.38 ), reward eval: reward: -3.72 , reward normalized=-2.41/4.96 , grad norm= 83.94 , loss_value= 184.70 , loss_actor= 12.14 , target value: -16.55 : 88%|########8 | 8800/10000 [00:14<00:01, 603.43it /s]
1653
- reward: -4.34 (r0 = -2.38 ), reward eval: reward: -3.72 , reward normalized=-2.41/4.96 , grad norm= 83.94 , loss_value= 184.70 , loss_actor= 12.14 , target value: -16.55 : 96%|#########6| 9600/10000 [00:14 <00:00, 408.81it /s]
1654
- reward: -12.30 (r0 = -2.38 ), reward eval: reward: -3.72 , reward normalized=-3.23/6.55 , grad norm= 181.45 , loss_value= 321.95 , loss_actor= 15.06 , target value: -22.91 : 96%|#########6| 9600/10000 [00:15 <00:00, 408.81it /s]
1655
- reward: -12.30 (r0 = -2.38 ), reward eval: reward: -3.72 , reward normalized=-3.23/6.55 , grad norm= 181.45 , loss_value= 321.95 , loss_actor= 15.06 , target value: -22.91 : : 10400it [00:17, 371.73it /s]
1656
- reward: -3.25 (r0 = -2.38 ), reward eval: reward: -3.72 , reward normalized=-4.02/6.12 , grad norm= 131.26 , loss_value= 237.20 , loss_actor= 23.33 , target value: -27.38 : : 10400it [00:18, 371.73it /s]
1637
+ 8%|8 | 800/10000 [00:00<00:05, 1686.02it /s]
1638
+ 16%|#6 | 1600/10000 [00:03 <00:17, 474.11it /s]
1639
+ 24%|##4 | 2400/10000 [00:03<00:11, 671.52it /s]
1640
+ 32%|###2 | 3200/10000 [00:04 <00:07, 855.29it /s]
1641
+ 40%|#### | 4000/10000 [00:04<00:05, 1017.39it /s]
1642
+ 48%|####8 | 4800/10000 [00:05 <00:04, 1163.51it /s]
1643
+ 56%|#####6 | 5600/10000 [00:05<00:03, 1272.13it /s]
1644
+ reward: -2.71 (r0 = -3.45 ), reward eval: reward: 0.01 , reward normalized=-2.89 /6.36 , grad norm= 139.40 , loss_value= 452.21 , loss_actor= 15.36 , target value: -17.24 : 56%|#####6 | 5600/10000 [00:06<00:03, 1272.13it /s]
1645
+ reward: -2.71 (r0 = -3.45 ), reward eval: reward: 0.01 , reward normalized=-2.89 /6.36 , grad norm= 139.40 , loss_value= 452.21 , loss_actor= 15.36 , target value: -17.24 : 64%|######4 | 6400/10000 [00:07 <00:04, 870.02it /s]
1646
+ reward: -0.14 (r0 = -3.45 ), reward eval: reward: 0.01 , reward normalized=-2.44 /5.99 , grad norm= 161.20 , loss_value= 324.10 , loss_actor= 12.66 , target value: -15.55 : 64%|######4 | 6400/10000 [00:07<00:04, 870.02it /s]
1647
+ reward: -0.14 (r0 = -3.45 ), reward eval: reward: 0.01 , reward normalized=-2.44 /5.99 , grad norm= 161.20 , loss_value= 324.10 , loss_actor= 12.66 , target value: -15.55 : 72%|#######2 | 7200/10000 [00:08<00:03, 703.58it /s]
1648
+ reward: -2.45 (r0 = -3.45 ), reward eval: reward: 0.01 , reward normalized=-2.42/5.98 , grad norm= 130.59 , loss_value= 312.04 , loss_actor= 12.01 , target value: -15.33 : 72%|#######2 | 7200/10000 [00:09<00:03, 703.58it /s]
1649
+ reward: -2.45 (r0 = -3.45 ), reward eval: reward: 0.01 , reward normalized=-2.42/5.98 , grad norm= 130.59 , loss_value= 312.04 , loss_actor= 12.01 , target value: -15.33 : 80%|######## | 8000/10000 [00:10 <00:03, 624.50it /s]
1650
+ reward: -4 .12 (r0 = -3.45 ), reward eval: reward: 0.01 , reward normalized=-2.73 /5.00 , grad norm= 205.76 , loss_value= 217.68 , loss_actor= 16.33 , target value: -18.26 : 80%|######## | 8000/10000 [00:11 <00:03, 624.50it /s]
1651
+ reward: -4 .12 (r0 = -3.45 ), reward eval: reward: 0.01 , reward normalized=-2.73 /5.00 , grad norm= 205.76 , loss_value= 217.68 , loss_actor= 16.33 , target value: -18.26 : 88%|########8 | 8800/10000 [00:12 <00:02, 581.06it /s]
1652
+ reward: -5.10 (r0 = -3.45 ), reward eval: reward: -7.09 , reward normalized=-2.55/5.13 , grad norm= 123.25 , loss_value= 197.78 , loss_actor= 19.07 , target value: -17.02 : 88%|########8 | 8800/10000 [00:14<00:02, 581.06it /s]
1653
+ reward: -5.10 (r0 = -3.45 ), reward eval: reward: -7.09 , reward normalized=-2.55/5.13 , grad norm= 123.25 , loss_value= 197.78 , loss_actor= 19.07 , target value: -17.02 : 96%|#########6| 9600/10000 [00:15 <00:01, 391.08it /s]
1654
+ reward: -5.12 (r0 = -3.45 ), reward eval: reward: -7.09 , reward normalized=-3.09/5.14 , grad norm= 269.24 , loss_value= 261.51 , loss_actor= 18.04 , target value: -22.19 : 96%|#########6| 9600/10000 [00:16 <00:01, 391.08it /s]
1655
+ reward: -5.12 (r0 = -3.45 ), reward eval: reward: -7.09 , reward normalized=-3.09/5.14 , grad norm= 269.24 , loss_value= 261.51 , loss_actor= 18.04 , target value: -22.19 : : 10400it [00:18, 354.47it /s]
1656
+ reward: -5.27 (r0 = -3.45 ), reward eval: reward: -7.09 , reward normalized=-3.08/5.03 , grad norm= 66.19 , loss_value= 194.93 , loss_actor= 20.81 , target value: -22.46 : : 10400it [00:19, 354.47it /s]
1657
1657
1658
1658
1659
1659
@@ -1723,7 +1723,7 @@ To iterate further on this loss module we might consider:
1723
1723
1724
1724
.. rst-class :: sphx-glr-timing
1725
1725
1726
- **Total running time of the script: ** ( 0 minutes 28.061 seconds)
1726
+ **Total running time of the script: ** ( 0 minutes 29.218 seconds)
1727
1727
1728
1728
1729
1729
.. _sphx_glr_download_advanced_coding_ddpg.py :
0 commit comments