Skip to content

Commit 28e956f

Browse files
committed
feat(Agents): add MAD5PG and MADRL agents
1 parent 162833d commit 28e956f

25 files changed

+6242
-59
lines changed

Agents/MAD4PG/agent.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,8 @@ class MAD3PGConfig:
6565
critic_optimizers: Optional[List[snt.Optimizer]] = None
6666
min_replay_size: int = 1000
6767
max_replay_size: int = 1000000
68-
samples_per_insert: Optional[float] = 32.0
69-
n_step: int = 5
68+
samples_per_insert: Optional[float] = 1.0
69+
n_step: int = 1
7070
sigma: float = 0.3
7171
clipping: bool = True
7272
replay_table_name: str = reverb_adders.DEFAULT_PRIORITY_TABLE

Agents/MAD5PG/__init__.py

Whitespace-only changes.

Agents/MAD5PG/actors.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
"""Generic actor implementation, using TensorFlow and Sonnet."""
2+
3+
from typing import Optional, List
4+
from acme import adders
5+
from acme import core
6+
from acme import types
7+
from acme.tf import utils as tf2_utils
8+
from acme.tf import variable_utils as tf2_variable_utils
9+
import dm_env
10+
import sonnet as snt
11+
import tensorflow as tf
12+
import tensorflow_probability as tfp
13+
import numpy as np
14+
from Log.logger import myapp
15+
tfd = tfp.distributions
16+
17+
18+
class FeedForwardActor(core.Actor):
19+
"""A feed-forward actor.
20+
21+
An actor based on a feed-forward policy which takes non-batched observations
22+
and outputs non-batched actions. It also allows adding experiences to replay
23+
and updating the weights from the policy on the learner.
24+
"""
25+
26+
def __init__(
27+
self,
28+
policy_networks: snt.Module,
29+
30+
edge_number: int,
31+
edge_action_size: int,
32+
33+
adder: Optional[adders.Adder] = None,
34+
variable_client: Optional[tf2_variable_utils.VariableClient] = None,
35+
):
36+
"""Initializes the actor.
37+
38+
Args:
39+
policy_network: A module which takes observations and outputs
40+
actions.
41+
adder: the adder object to which allows to add experiences to a
42+
dataset/replay buffer.
43+
variable_client: object which allows to copy weights from the learner copy
44+
of the policy to the actor copy (in case they are separate).
45+
"""
46+
47+
# Store these for later use.
48+
self._adder = adder
49+
self._variable_client = variable_client
50+
self._policy_networks = policy_networks
51+
52+
self._edge_number = edge_number
53+
self._edge_action_size = edge_action_size
54+
55+
@tf.function(experimental_relax_shapes=True)
56+
def _policy(
57+
self,
58+
observations: types.NestedTensor,
59+
) -> types.NestedTensor:
60+
# # Add a dummy batch dimension and as a side effect convert numpy to TF.
61+
# Compute the policy, conditioned on the observation.
62+
# myapp.debug(f"observations: {np.array(observations)}")
63+
edge_actions = []
64+
for i in range(self._edge_number):
65+
# myapp.debug(f"i: {i}")
66+
edge_observation = observations[i, :]
67+
# myapp.debug(f"edge_observation: {np.array(edge_observation)}")
68+
edge_batched_observation = tf2_utils.add_batch_dim(edge_observation)
69+
# myapp.debug(f"edge_batched_observation: {edge_batched_observation}")
70+
edge_policy = self._policy_networks(edge_batched_observation)
71+
edge_action = edge_policy.sample() if isinstance(edge_policy, tfd.Distribution) else edge_policy
72+
# myapp.debug(f"edge_action: {edge_action}")
73+
edge_actions.append(edge_action)
74+
75+
edge_actions = tf.convert_to_tensor(edge_actions, dtype=tf.float64)
76+
# myapp.debug(f"edge_actions: {edge_actions}")
77+
action = tf.reshape(edge_actions, [self._edge_number, self._edge_action_size])
78+
# myapp.debug(f"action: {action}")
79+
return action
80+
81+
def select_action(self, observation: types.NestedArray) -> types.NestedArray:
82+
# Pass the observation through the policy network.
83+
action = self._policy(
84+
observations=tf.convert_to_tensor(observation, dtype=tf.float64))
85+
# Return a numpy array with squeezed out batch dimension.
86+
return action
87+
88+
def observe_first(self, timestep: dm_env.TimeStep):
89+
if self._adder:
90+
self._adder.add_first(timestep)
91+
92+
def observe(self, action: types.NestedArray, next_timestep: dm_env.TimeStep):
93+
if self._adder:
94+
self._adder.add(action, next_timestep)
95+
96+
def update(self, wait: bool = False):
97+
if self._variable_client:
98+
self._variable_client.update(wait)

Agents/MAD5PG/actors_test.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import sys
2+
sys.path.append(r"/home/neardws/Documents/Game-Theoretic-Deep-Reinforcement-Learning/")
3+
from environment_loop import EnvironmentLoop
4+
from absl.testing import absltest
5+
from Agents.MAD4PG import actors
6+
from Environment.environment import make_environment_spec
7+
from Agents.MAD4PG.networks import make_policy_network
8+
from Experiment.make_environment import get_default_environment
9+
10+
class ActorTest(absltest.TestCase):
11+
12+
13+
def test_feedforward(self):
14+
15+
time_slots, task_list, vehicle_list, edge_list, distance_matrix, channel_condition_matrix, \
16+
vehicle_index_within_edges, environment_config, environment = get_default_environment()
17+
18+
env_spec = make_environment_spec(environment)
19+
20+
policy_networks = [make_policy_network(env_spec.edge_actions) for _ in range(environment_config.edge_number)]
21+
22+
actor = actors.FeedForwardActor(
23+
policy_networks=policy_networks,
24+
25+
edge_number=environment_config.edge_number,
26+
edge_action_size=environment_config.action_size,
27+
)
28+
loop = EnvironmentLoop(environment, actor)
29+
loop.run(20)
30+
31+
32+
if __name__ == '__main__':
33+
absltest.main()

0 commit comments

Comments
 (0)