@@ -711,17 +711,18 @@ def record(self, episode, epsilon, step):
711
711
f"{ datetime .datetime .now ().strftime ('%Y-%m-%dT%H:%M:%S' ):>20} \n "
712
712
)
713
713
714
- for metric in ["ep_rewards" , "ep_lengths" , "ep_avg_losses" , "ep_avg_qs" ]:
715
- plt .plot (getattr (self , f"moving_avg_{ metric } " ))
716
- plt .savefig (getattr (self , f"{ metric } _plot" ))
714
+ for metric in ["ep_lengths" , "ep_avg_losses" , "ep_avg_qs" , "ep_rewards" ]:
717
715
plt .clf ()
716
+ plt .plot (getattr (self , f"moving_avg_{ metric } " ), label = f"moving_avg_{ metric } " )
717
+ plt .legend ()
718
+ plt .savefig (getattr (self , f"{ metric } _plot" ))
718
719
719
720
720
721
######################################################################
721
722
# Let’s play!
722
723
# """""""""""""""
723
724
#
724
- # In this example we run the training loop for 10 episodes, but for Mario to truly learn the ways of
725
+ # In this example we run the training loop for 40 episodes, but for Mario to truly learn the ways of
725
726
# his world, we suggest running the loop for at least 40,000 episodes!
726
727
#
727
728
use_cuda = torch .cuda .is_available ()
@@ -735,7 +736,7 @@ def record(self, episode, epsilon, step):
735
736
736
737
logger = MetricLogger (save_dir )
737
738
738
- episodes = 10
739
+ episodes = 40
739
740
for e in range (episodes ):
740
741
741
742
state = env .reset ()
0 commit comments