Skip to content

[MLA-1809] catch mismatched observation sizes #5030

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Mar 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions com.unity.ml-agents/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ and this project adheres to
#### com.unity.ml-agents / com.unity.ml-agents.extensions (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
- An issue that caused `GAIL` to fail for environments where agents can terminate episodes by self-sacrifice has been fixed. (#4971)
- Made the error message when observations of different shapes are sent to the trainer clearer. (#5030)
- Fix an issue where queuing InputEvents overwrote data from previous events in the same frame.

## [1.8.0-preview] - 2021-02-17
Expand Down
76 changes: 55 additions & 21 deletions ml-agents-envs/mlagents_envs/rpc_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,26 @@ def _process_images_num_channels(image_arrays, expected_channels):
return img


def _check_observations_match_spec(
obs_index: int,
observation_spec: ObservationSpec,
agent_info_list: Collection[AgentInfoProto],
) -> None:
"""
Check that all the observations match the expected size.
This gives a nicer error than a cryptic numpy error later.
"""
expected_obs_shape = tuple(observation_spec.shape)
for agent_info in agent_info_list:
agent_obs_shape = tuple(agent_info.observations[obs_index].shape)
if expected_obs_shape != agent_obs_shape:
raise UnityObservationException(
f"Observation at index={obs_index} for agent with "
f"id={agent_info.id} didn't match the ObservationSpec. "
f"Expected shape {expected_obs_shape} but got {agent_obs_shape}."
)


@timed
def _observation_to_np_array(
obs: ObservationProto, expected_shape: Optional[Iterable[int]] = None
Expand Down Expand Up @@ -223,16 +243,23 @@ def _observation_to_np_array(
@timed
def _process_maybe_compressed_observation(
obs_index: int,
shape: Tuple[int, int, int],
observation_spec: ObservationSpec,
agent_info_list: Collection[AgentInfoProto],
) -> np.ndarray:
shape = cast(Tuple[int, int, int], observation_spec.shape)
if len(agent_info_list) == 0:
return np.zeros((0, shape[0], shape[1], shape[2]), dtype=np.float32)

batched_visual = [
_observation_to_np_array(agent_obs.observations[obs_index], shape)
for agent_obs in agent_info_list
]
try:
batched_visual = [
_observation_to_np_array(agent_obs.observations[obs_index], shape)
for agent_obs in agent_info_list
]
except ValueError:
# Try to get a more useful error message
_check_observations_match_spec(obs_index, observation_spec, agent_info_list)
# If that didn't raise anything, raise the original error
raise
return np.array(batched_visual, dtype=np.float32)


Expand All @@ -258,17 +285,25 @@ def _raise_on_nan_and_inf(data: np.array, source: str) -> np.array:

@timed
def _process_rank_one_or_two_observation(
obs_index: int, shape: Tuple[int, ...], agent_info_list: Collection[AgentInfoProto]
obs_index: int,
observation_spec: ObservationSpec,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is better now.

agent_info_list: Collection[AgentInfoProto],
) -> np.ndarray:
if len(agent_info_list) == 0:
return np.zeros((0,) + shape, dtype=np.float32)
np_obs = np.array(
[
agent_obs.observations[obs_index].float_data.data
for agent_obs in agent_info_list
],
dtype=np.float32,
).reshape((len(agent_info_list),) + shape)
return np.zeros((0,) + observation_spec.shape, dtype=np.float32)
try:
np_obs = np.array(
[
agent_obs.observations[obs_index].float_data.data
for agent_obs in agent_info_list
],
dtype=np.float32,
).reshape((len(agent_info_list),) + observation_spec.shape)
except ValueError:
# Try to get a more useful error message
_check_observations_match_spec(obs_index, observation_spec, agent_info_list)
# If that didn't raise anything, raise the original error
raise
_raise_on_nan_and_inf(np_obs, "observations")
return np_obs

Expand All @@ -285,29 +320,28 @@ def steps_from_proto(
]
decision_obs_list: List[np.ndarray] = []
terminal_obs_list: List[np.ndarray] = []
for obs_index, observation_specs in enumerate(behavior_spec.observation_specs):
is_visual = len(observation_specs.shape) == 3
for obs_index, observation_spec in enumerate(behavior_spec.observation_specs):
is_visual = len(observation_spec.shape) == 3
if is_visual:
obs_shape = cast(Tuple[int, int, int], observation_specs.shape)
decision_obs_list.append(
_process_maybe_compressed_observation(
obs_index, obs_shape, decision_agent_info_list
obs_index, observation_spec, decision_agent_info_list
)
)
terminal_obs_list.append(
_process_maybe_compressed_observation(
obs_index, obs_shape, terminal_agent_info_list
obs_index, observation_spec, terminal_agent_info_list
)
)
else:
decision_obs_list.append(
_process_rank_one_or_two_observation(
obs_index, observation_specs.shape, decision_agent_info_list
obs_index, observation_spec, decision_agent_info_list
)
)
terminal_obs_list.append(
_process_rank_one_or_two_observation(
obs_index, observation_specs.shape, terminal_agent_info_list
obs_index, observation_spec, terminal_agent_info_list
)
)
decision_rewards = np.array(
Expand Down
42 changes: 35 additions & 7 deletions ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,17 +265,21 @@ def test_process_pixels_gray():
def test_vector_observation():
n_agents = 10
shapes = [(3,), (4,)]
obs_specs = create_observation_specs_with_shapes(shapes)
list_proto = generate_list_agent_proto(n_agents, shapes)
for obs_index, shape in enumerate(shapes):
arr = _process_rank_one_or_two_observation(obs_index, shape, list_proto)
arr = _process_rank_one_or_two_observation(
obs_index, obs_specs[obs_index], list_proto
)
assert list(arr.shape) == ([n_agents] + list(shape))
assert np.allclose(arr, 0.1, atol=0.01)


def test_process_visual_observation():
in_array_1 = np.random.rand(128, 64, 3)
shape = (128, 64, 3)
in_array_1 = np.random.rand(*shape)
proto_obs_1 = generate_compressed_proto_obs(in_array_1)
in_array_2 = np.random.rand(128, 64, 3)
in_array_2 = np.random.rand(*shape)
in_array_2_mapping = [0, 1, 2]
proto_obs_2 = generate_compressed_proto_obs_with_mapping(
in_array_2, in_array_2_mapping
Expand All @@ -286,7 +290,8 @@ def test_process_visual_observation():
ap2 = AgentInfoProto()
ap2.observations.extend([proto_obs_2])
ap_list = [ap1, ap2]
arr = _process_maybe_compressed_observation(0, (128, 64, 3), ap_list)
obs_spec = create_observation_specs_with_shapes([shape])[0]
arr = _process_maybe_compressed_observation(0, obs_spec, ap_list)
assert list(arr.shape) == [2, 128, 64, 3]
assert np.allclose(arr[0, :, :, :], in_array_1, atol=0.01)
assert np.allclose(arr[1, :, :, :], in_array_2, atol=0.01)
Expand All @@ -308,7 +313,9 @@ def test_process_visual_observation_grayscale():
ap2 = AgentInfoProto()
ap2.observations.extend([proto_obs_2])
ap_list = [ap1, ap2]
arr = _process_maybe_compressed_observation(0, (128, 64, 1), ap_list)
shape = (128, 64, 1)
obs_spec = create_observation_specs_with_shapes([shape])[0]
arr = _process_maybe_compressed_observation(0, obs_spec, ap_list)
assert list(arr.shape) == [2, 128, 64, 1]
assert np.allclose(arr[0, :, :, :], expected_out_array_1, atol=0.01)
assert np.allclose(arr[1, :, :, :], expected_out_array_2, atol=0.01)
Expand All @@ -325,7 +332,10 @@ def test_process_visual_observation_padded_channels():
ap1 = AgentInfoProto()
ap1.observations.extend([proto_obs_1])
ap_list = [ap1]
arr = _process_maybe_compressed_observation(0, (128, 64, 8), ap_list)
shape = (128, 64, 8)
obs_spec = create_observation_specs_with_shapes([shape])[0]

arr = _process_maybe_compressed_observation(0, obs_spec, ap_list)
assert list(arr.shape) == [1, 128, 64, 8]
assert np.allclose(arr[0, :, :, :], expected_out_array_1, atol=0.01)

Expand All @@ -336,8 +346,12 @@ def test_process_visual_observation_bad_shape():
ap1 = AgentInfoProto()
ap1.observations.extend([proto_obs_1])
ap_list = [ap1]

shape = (128, 42, 3)
obs_spec = create_observation_specs_with_shapes([shape])[0]

with pytest.raises(UnityObservationException):
_process_maybe_compressed_observation(0, (128, 42, 3), ap_list)
_process_maybe_compressed_observation(0, obs_spec, ap_list)


def test_batched_step_result_from_proto():
Expand Down Expand Up @@ -370,6 +384,20 @@ def test_batched_step_result_from_proto():
assert terminal_steps.obs[1].shape[1] == shapes[1][0]


def test_mismatch_observations_raise_in_step_result_from_proto():
n_agents = 10
shapes = [(3,), (4,)]
spec = BehaviorSpec(
create_observation_specs_with_shapes(shapes), ActionSpec.create_continuous(3)
)
ap_list = generate_list_agent_proto(n_agents, shapes)
# Hack an observation to be larger, we should get an exception
ap_list[0].observations[0].shape[0] += 1
ap_list[0].observations[0].float_data.data.append(0.42)
with pytest.raises(UnityObservationException):
steps_from_proto(ap_list, spec)


def test_action_masking_discrete():
n_agents = 10
shapes = [(3,), (4,)]
Expand Down