We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 45cdbd1 commit 4a6516aCopy full SHA for 4a6516a
torchrl/objectives/ppo.py
@@ -48,7 +48,7 @@ class PPOLoss(LossModule):
48
gamma (scalar): a discount factor for return computation.
49
loss_function (str): loss function for the value discrepancy. Can be one of "l1", "l2" or "smooth_l1".
50
normalize_advantage (bool): if True, the advantage will be normalized before being used.
51
- Defaults to True.
+ Defaults to False.
52
53
"""
54
@@ -64,7 +64,7 @@ def __init__(
64
critic_coef: float = 1.0,
65
gamma: float = 0.99,
66
loss_critic_type: str = "smooth_l1",
67
- normalize_advantage: bool = True,
+ normalize_advantage: bool = False,
68
):
69
super().__init__()
70
self.convert_to_functional(
0 commit comments