Progress on propagating the setting to the action model.

cmard · cmard · commit 7e7c3e2ff911 · 2021-10-27T09:54:03.000-04:00
diff --git a/ml-agents/mlagents/trainers/cli_utils.py b/ml-agents/mlagents/trainers/cli_utils.py
@@ -91,6 +91,13 @@ def _create_parser() -> argparse.ArgumentParser:
         "before resuming training. This option is only valid when the models exist, and have the same "
         "behavior names as the current agents in your scene.",
     )
+    argparser.add_argument(
+        "--deterministic",
+        default=False,
+        dest="deterministic",
+        action=DetectDefaultStoreTrue,
+        help="Whether to use the deterministic samples from the data.",
+    )
     argparser.add_argument(
         "--force",
         default=False,
diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py
@@ -151,6 +151,7 @@ def _check_valid_memory_size(self, attribute, value):
     vis_encode_type: EncoderType = EncoderType.SIMPLE
     memory: Optional[MemorySettings] = None
     goal_conditioning_type: ConditioningType = ConditioningType.HYPER
+    deterministic: bool = parser.get_default("deterministic")
 
 
 @attr.s(auto_attribs=True)
@@ -928,6 +929,7 @@ def from_argparse(args: argparse.Namespace) -> "RunOptions":
                         key
                     )
                 )
+
         # Override with CLI args
         # Keep deprecated --load working, TODO: remove
         argparse_args["resume"] = argparse_args["resume"] or argparse_args["load_model"]
@@ -950,6 +952,13 @@ def from_argparse(args: argparse.Namespace) -> "RunOptions":
         if isinstance(final_runoptions.behaviors, TrainerSettings.DefaultTrainerDict):
             # configure whether or not we should require all behavior names to be found in the config YAML
             final_runoptions.behaviors.set_config_specified(_require_all_behaviors)
+
+        for behaviour in final_runoptions.behaviors.keys():
+            if not final_runoptions.behaviors[behaviour].network_settings.deterministic:
+                final_runoptions.behaviors[
+                    behaviour
+                ].network_settings.deterministic = argparse_args["deterministic"]
+
         return final_runoptions
 
     @staticmethod
diff --git a/ml-agents/mlagents/trainers/torch/action_model.py b/ml-agents/mlagents/trainers/torch/action_model.py
@@ -32,6 +32,7 @@ def __init__(
         action_spec: ActionSpec,
         conditional_sigma: bool = False,
         tanh_squash: bool = False,
+        deterministic: bool = False,
     ):
         """
         A torch module that represents the action space of a policy. The ActionModel may contain
@@ -66,6 +67,7 @@ def __init__(
         # During training, clipping is done in TorchPolicy, but we need to clip before ONNX
         # export as well.
         self._clip_action_on_export = not tanh_squash
+        self.deterministic = deterministic
 
     def _sample_action(self, dists: DistInstances) -> AgentAction:
         """
diff --git a/ml-agents/mlagents/trainers/torch/networks.py b/ml-agents/mlagents/trainers/torch/networks.py
@@ -617,6 +617,7 @@ def __init__(
             action_spec,
             conditional_sigma=conditional_sigma,
             tanh_squash=tanh_squash,
+            deterministic=network_settings.deterministic,
         )
 
     @property

Original file line number	Diff line number	Diff line change
`@@ -617,6 +617,7 @@ def __init__(`
`617`	`617`	`action_spec,`
`618`	`618`	`conditional_sigma=conditional_sigma,`
`619`	`619`	`tanh_squash=tanh_squash,`
	`620`	`+ deterministic=network_settings.deterministic,`
`620`	`621`	`)`
`621`	`622`
`622`	`623`	`@property`