@@ -47,7 +47,7 @@ def act(self, state):
47
47
def update (self , state , action , reward , next_state , done ):
48
48
'''Update per timestep after env transitions, e.g. memory, algorithm, update agent params, train net'''
49
49
self .body .update (state , action , reward , next_state , done )
50
- if util .in_eval_lab_modes (): # eval does not update agent for training
50
+ if util .in_eval_lab_mode (): # eval does not update agent for training
51
51
return
52
52
self .body .memory .update (state , action , reward , next_state , done )
53
53
loss = self .algorithm .train ()
@@ -59,7 +59,7 @@ def update(self, state, action, reward, next_state, done):
59
59
@lab_api
60
60
def save (self , ckpt = None ):
61
61
'''Save agent'''
62
- if util .in_eval_lab_modes (): # eval does not save new models
62
+ if util .in_eval_lab_mode (): # eval does not save new models
63
63
return
64
64
self .algorithm .save (ckpt = ckpt )
65
65
@@ -103,8 +103,16 @@ def __init__(self, env, spec, aeb=(0, 0, 0)):
103
103
self .train_df = pd .DataFrame (columns = [
104
104
'epi' , 't' , 'wall_t' , 'opt_step' , 'frame' , 'fps' , 'total_reward' , 'total_reward_ma' , 'loss' , 'lr' ,
105
105
'explore_var' , 'entropy_coef' , 'entropy' , 'grad_norm' ])
106
+
107
+ # in train@ mode, override from saved train_df if exists
108
+ if util .in_train_lab_mode () and self .spec ['meta' ]['resume' ]:
109
+ train_df_filepath = util .get_session_df_path (self .spec , 'train' )
110
+ if os .path .exists (train_df_filepath ):
111
+ self .train_df = util .read (train_df_filepath )
112
+ self .env .clock .load (self .train_df )
113
+
106
114
# track eval data within run_eval. the same as train_df except for reward
107
- if ps . get ( self .spec , 'meta. rigorous_eval' ) :
115
+ if self .spec [ 'meta' ][ ' rigorous_eval'] :
108
116
self .eval_df = self .train_df .copy ()
109
117
else :
110
118
self .eval_df = self .train_df
@@ -178,6 +186,7 @@ def ckpt(self, env, df_mode):
178
186
df = getattr (self , f'{ df_mode } _df' )
179
187
df .loc [len (df )] = row # append efficiently to df
180
188
df .iloc [- 1 ]['total_reward_ma' ] = total_reward_ma = df [- viz .PLOT_MA_WINDOW :]['total_reward' ].mean ()
189
+ df .drop_duplicates ('frame' , inplace = True ) # remove any duplicates by the same frame
181
190
self .total_reward_ma = total_reward_ma
182
191
183
192
def get_mean_lr (self ):
@@ -192,10 +201,9 @@ def get_mean_lr(self):
192
201
193
202
def get_log_prefix (self ):
194
203
'''Get the prefix for logging'''
195
- spec = self .agent .spec
196
- spec_name = spec ['name' ]
197
- trial_index = spec ['meta' ]['trial' ]
198
- session_index = spec ['meta' ]['session' ]
204
+ spec_name = self .spec ['name' ]
205
+ trial_index = self .spec ['meta' ]['trial' ]
206
+ session_index = self .spec ['meta' ]['session' ]
199
207
prefix = f'Trial { trial_index } session { session_index } { spec_name } _t{ trial_index } _s{ session_index } '
200
208
return prefix
201
209
@@ -232,8 +240,8 @@ def log_tensorboard(self):
232
240
self .tb_actions = [] # store actions for tensorboard
233
241
logger .info (f'Using TensorBoard logging for dev mode. Run `tensorboard --logdir={ log_prepath } ` to start TensorBoard.' )
234
242
235
- trial_index = self .agent . spec ['meta' ]['trial' ]
236
- session_index = self .agent . spec ['meta' ]['session' ]
243
+ trial_index = self .spec ['meta' ]['trial' ]
244
+ session_index = self .spec ['meta' ]['session' ]
237
245
if session_index != 0 : # log only session 0
238
246
return
239
247
idx_suffix = f'trial{ trial_index } _session{ session_index } '
0 commit comments