@@ -36,7 +36,7 @@ class DDPGLoss(LossModule):
36
36
delay_actor (bool, optional): whether to separate the target actor networks from the actor networks used for
37
37
data collection. Default is ``False``.
38
38
delay_value (bool, optional): whether to separate the target value networks from the value networks used for
39
- data collection. Default is ``True ``.
39
+ data collection. Default is ``False ``.
40
40
"""
41
41
42
42
default_value_estimator : ValueEstimators = ValueEstimators .TD0
@@ -48,7 +48,7 @@ def __init__(
48
48
* ,
49
49
loss_function : str = "l2" ,
50
50
delay_actor : bool = False ,
51
- delay_value : bool = True ,
51
+ delay_value : bool = False ,
52
52
gamma : float = None ,
53
53
) -> None :
54
54
super ().__init__ ()
@@ -84,7 +84,7 @@ def __init__(
84
84
85
85
self .actor_in_keys = actor_network .in_keys
86
86
87
- self .loss_function = loss_function
87
+ self .loss_funtion = loss_function
88
88
89
89
if gamma is not None :
90
90
warnings .warn (_GAMMA_LMBDA_DEPREC_WARNING , category = DeprecationWarning )
@@ -173,7 +173,7 @@ def _loss_value(
173
173
174
174
# td_error = pred_val - target_value
175
175
loss_value = distance_loss (
176
- pred_val , target_value , loss_function = self .loss_function
176
+ pred_val , target_value , loss_function = self .loss_funtion
177
177
)
178
178
179
179
return loss_value , (pred_val - target_value ).pow (2 ), pred_val , target_value
@@ -186,7 +186,7 @@ def make_value_estimator(self, value_type: ValueEstimators = None, **hyperparams
186
186
if hasattr (self , "gamma" ):
187
187
hp ["gamma" ] = self .gamma
188
188
hp .update (hyperparams )
189
- value_key = "state_action_value"
189
+ value_key = self . state_action_value_key
190
190
if value_type == ValueEstimators .TD1 :
191
191
self ._value_estimator = TD1Estimator (
192
192
value_network = self .actor_critic , value_key = value_key , ** hp
0 commit comments