neardws
diff --git a/‎Agents/MAD4PG/agent.py
Lines changed: 2 additions & 2 deletions b/‎Agents/MAD4PG/agent.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎Agents/MAD5PG/__init__.py b/‎Agents/MAD5PG/__init__.py
diff --git a/‎Agents/MAD5PG/actors.py
Lines changed: 98 additions & 0 deletions b/‎Agents/MAD5PG/actors.py
Lines changed: 98 additions & 0 deletions
diff --git a/‎Agents/MAD5PG/actors_test.py
Lines changed: 33 additions & 0 deletions b/‎Agents/MAD5PG/actors_test.py
Lines changed: 33 additions & 0 deletions
@@ -65,8 +65,8 @@ class MAD3PGConfig:
     critic_optimizers: Optional[List[snt.Optimizer]] = None
     min_replay_size: int = 1000
     max_replay_size: int = 1000000
-    samples_per_insert: Optional[float] = 32.0
-    n_step: int = 5
+    samples_per_insert: Optional[float] = 1.0
+    n_step: int = 1
     sigma: float = 0.3
     clipping: bool = True
     replay_table_name: str = reverb_adders.DEFAULT_PRIORITY_TABLE
 
@@ -0,0 +1,98 @@
+"""Generic actor implementation, using TensorFlow and Sonnet."""
+
+from typing import Optional, List
+from acme import adders
+from acme import core
+from acme import types
+from acme.tf import utils as tf2_utils
+from acme.tf import variable_utils as tf2_variable_utils
+import dm_env
+import sonnet as snt
+import tensorflow as tf
+import tensorflow_probability as tfp
+import numpy as np
+from Log.logger import myapp
+tfd = tfp.distributions
+
+
+class FeedForwardActor(core.Actor):
+    """A feed-forward actor.
+
+    An actor based on a feed-forward policy which takes non-batched observations
+    and outputs non-batched actions. It also allows adding experiences to replay
+    and updating the weights from the policy on the learner.
+    """
+
+    def __init__(
+        self,
+        policy_networks: snt.Module,
+        
+        edge_number: int,
+        edge_action_size: int,
+
+        adder: Optional[adders.Adder] = None,
+        variable_client: Optional[tf2_variable_utils.VariableClient] = None,
+    ):
+        """Initializes the actor.
+
+        Args:
+            policy_network: A module which takes observations and outputs
+                actions.
+            adder: the adder object to which allows to add experiences to a
+                dataset/replay buffer.
+            variable_client: object which allows to copy weights from the learner copy
+                of the policy to the actor copy (in case they are separate).
+        """
+
+        # Store these for later use.
+        self._adder = adder
+        self._variable_client = variable_client
+        self._policy_networks = policy_networks   
+
+        self._edge_number = edge_number
+        self._edge_action_size = edge_action_size
+
+    @tf.function(experimental_relax_shapes=True)
+    def _policy(
+        self, 
+        observations: types.NestedTensor,
+    ) -> types.NestedTensor:
+        # # Add a dummy batch dimension and as a side effect convert numpy to TF.
+        # Compute the policy, conditioned on the observation.
+        # myapp.debug(f"observations: {np.array(observations)}")
+        edge_actions = []
+        for i in range(self._edge_number):
+            # myapp.debug(f"i: {i}")
+            edge_observation = observations[i, :]
+            # myapp.debug(f"edge_observation: {np.array(edge_observation)}")
+            edge_batched_observation = tf2_utils.add_batch_dim(edge_observation)
+            # myapp.debug(f"edge_batched_observation: {edge_batched_observation}")
+            edge_policy = self._policy_networks(edge_batched_observation)
+            edge_action = edge_policy.sample() if isinstance(edge_policy, tfd.Distribution) else edge_policy
+            # myapp.debug(f"edge_action: {edge_action}")
+            edge_actions.append(edge_action)
+            
+        edge_actions = tf.convert_to_tensor(edge_actions, dtype=tf.float64)
+        # myapp.debug(f"edge_actions: {edge_actions}")
+        action = tf.reshape(edge_actions, [self._edge_number, self._edge_action_size])
+        # myapp.debug(f"action: {action}")
+        return action
+
+    def select_action(self, observation: types.NestedArray) -> types.NestedArray:
+        # Pass the observation through the policy network.
+        action = self._policy(
+            observations=tf.convert_to_tensor(observation, dtype=tf.float64))
+        # Return a numpy array with squeezed out batch dimension.
+        return action
+
+    def observe_first(self, timestep: dm_env.TimeStep):
+        if self._adder:
+            self._adder.add_first(timestep)
+
+    def observe(self, action: types.NestedArray, next_timestep: dm_env.TimeStep):
+        if self._adder:
+            self._adder.add(action, next_timestep)
+
+    def update(self, wait: bool = False):
+        if self._variable_client:
+            self._variable_client.update(wait)
@@ -0,0 +1,33 @@
+import sys
+sys.path.append(r"/home/neardws/Documents/Game-Theoretic-Deep-Reinforcement-Learning/")
+from environment_loop import EnvironmentLoop
+from absl.testing import absltest
+from Agents.MAD4PG import actors
+from Environment.environment import make_environment_spec
+from Agents.MAD4PG.networks import make_policy_network
+from Experiment.make_environment import get_default_environment
+
+class ActorTest(absltest.TestCase):
+
+
+    def test_feedforward(self):
+
+        time_slots, task_list, vehicle_list, edge_list, distance_matrix, channel_condition_matrix, \
+        vehicle_index_within_edges, environment_config, environment = get_default_environment()
+
+        env_spec = make_environment_spec(environment)
+
+        policy_networks = [make_policy_network(env_spec.edge_actions) for _ in range(environment_config.edge_number)]
+
+        actor = actors.FeedForwardActor(
+            policy_networks=policy_networks,
+            
+            edge_number=environment_config.edge_number,
+            edge_action_size=environment_config.action_size,
+        )
+        loop = EnvironmentLoop(environment, actor)
+        loop.run(20)
+
+
+if __name__ == '__main__':
+    absltest.main()