Unity-Technologies · chriselion · Mar 8, 2021 · Mar 4, 2021 · Mar 4, 2021 · Mar 4, 2021
diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md
@@ -22,6 +22,7 @@ and this project adheres to
 #### com.unity.ml-agents / com.unity.ml-agents.extensions (C#)
 #### ml-agents / ml-agents-envs / gym-unity (Python)
 - An issue that caused `GAIL` to fail for environments where agents can terminate episodes by self-sacrifice has been fixed. (#4971)
+- Made the error message when observations of different shapes are sent to the trainer clearer. (#5030)
 - Fix an issue where queuing InputEvents overwrote data from previous events in the same frame.
 
 ## [1.8.0-preview] - 2021-02-17

diff --git a/ml-agents-envs/mlagents_envs/rpc_utils.py b/ml-agents-envs/mlagents_envs/rpc_utils.py
@@ -187,6 +187,26 @@ def _process_images_num_channels(image_arrays, expected_channels):
     return img
 
 
+def _check_observations_match_spec(
+    obs_index: int,
+    observation_spec: ObservationSpec,
+    agent_info_list: Collection[AgentInfoProto],
+) -> None:
+    """
+    Check that all the observations match the expected size.
+    This gives a nicer error than a cryptic numpy error later.
+    """
+    expected_obs_shape = tuple(observation_spec.shape)
+    for agent_info in agent_info_list:
+        agent_obs_shape = tuple(agent_info.observations[obs_index].shape)
+        if expected_obs_shape != agent_obs_shape:
+            raise UnityObservationException(
+                f"Observation at index={obs_index} for agent with "
+                f"id={agent_info.id} didn't match the ObservationSpec. "
+                f"Expected shape {expected_obs_shape} but got {agent_obs_shape}."
+            )
+
+
 @timed
 def _observation_to_np_array(
     obs: ObservationProto, expected_shape: Optional[Iterable[int]] = None
@@ -223,16 +243,23 @@ def _observation_to_np_array(
 @timed
 def _process_maybe_compressed_observation(
     obs_index: int,
-    shape: Tuple[int, int, int],
+    observation_spec: ObservationSpec,
     agent_info_list: Collection[AgentInfoProto],
 ) -> np.ndarray:
+    shape = cast(Tuple[int, int, int], observation_spec.shape)
     if len(agent_info_list) == 0:
         return np.zeros((0, shape[0], shape[1], shape[2]), dtype=np.float32)
 
-    batched_visual = [
-        _observation_to_np_array(agent_obs.observations[obs_index], shape)
-        for agent_obs in agent_info_list
-    ]
+    try:
+        batched_visual = [
+            _observation_to_np_array(agent_obs.observations[obs_index], shape)
+            for agent_obs in agent_info_list
+        ]
+    except ValueError:
+        # Try to get a more useful error message
+        _check_observations_match_spec(obs_index, observation_spec, agent_info_list)
+        # If that didn't raise anything, raise the original error
+        raise
     return np.array(batched_visual, dtype=np.float32)
 
 
@@ -258,17 +285,25 @@ def _raise_on_nan_and_inf(data: np.array, source: str) -> np.array:
 
 @timed
 def _process_rank_one_or_two_observation(
-    obs_index: int, shape: Tuple[int, ...], agent_info_list: Collection[AgentInfoProto]
+    obs_index: int,
+    observation_spec: ObservationSpec,
+    agent_info_list: Collection[AgentInfoProto],
 ) -> np.ndarray:
     if len(agent_info_list) == 0:
-        return np.zeros((0,) + shape, dtype=np.float32)
-    np_obs = np.array(
-        [
-            agent_obs.observations[obs_index].float_data.data
-            for agent_obs in agent_info_list
-        ],
-        dtype=np.float32,
-    ).reshape((len(agent_info_list),) + shape)
+        return np.zeros((0,) + observation_spec.shape, dtype=np.float32)
+    try:
+        np_obs = np.array(
+            [
+                agent_obs.observations[obs_index].float_data.data
+                for agent_obs in agent_info_list
+            ],
+            dtype=np.float32,
+        ).reshape((len(agent_info_list),) + observation_spec.shape)
+    except ValueError:
+        # Try to get a more useful error message
+        _check_observations_match_spec(obs_index, observation_spec, agent_info_list)
+        # If that didn't raise anything, raise the original error
+        raise
     _raise_on_nan_and_inf(np_obs, "observations")
     return np_obs
 
@@ -285,29 +320,28 @@ def steps_from_proto(
     ]
     decision_obs_list: List[np.ndarray] = []
     terminal_obs_list: List[np.ndarray] = []
-    for obs_index, observation_specs in enumerate(behavior_spec.observation_specs):
-        is_visual = len(observation_specs.shape) == 3
+    for obs_index, observation_spec in enumerate(behavior_spec.observation_specs):
+        is_visual = len(observation_spec.shape) == 3
         if is_visual:
-            obs_shape = cast(Tuple[int, int, int], observation_specs.shape)
             decision_obs_list.append(
                 _process_maybe_compressed_observation(
-                    obs_index, obs_shape, decision_agent_info_list
+                    obs_index, observation_spec, decision_agent_info_list
                 )
             )
             terminal_obs_list.append(
                 _process_maybe_compressed_observation(
-                    obs_index, obs_shape, terminal_agent_info_list
+                    obs_index, observation_spec, terminal_agent_info_list
                 )
             )
         else:
             decision_obs_list.append(
                 _process_rank_one_or_two_observation(
-                    obs_index, observation_specs.shape, decision_agent_info_list
+                    obs_index, observation_spec, decision_agent_info_list
                 )
             )
             terminal_obs_list.append(
                 _process_rank_one_or_two_observation(
-                    obs_index, observation_specs.shape, terminal_agent_info_list
+                    obs_index, observation_spec, terminal_agent_info_list
                 )
             )
     decision_rewards = np.array(

diff --git a/ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py b/ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
@@ -265,17 +265,21 @@ def test_process_pixels_gray():
 def test_vector_observation():
     n_agents = 10
     shapes = [(3,), (4,)]
+    obs_specs = create_observation_specs_with_shapes(shapes)
     list_proto = generate_list_agent_proto(n_agents, shapes)
     for obs_index, shape in enumerate(shapes):
-        arr = _process_rank_one_or_two_observation(obs_index, shape, list_proto)
+        arr = _process_rank_one_or_two_observation(
+            obs_index, obs_specs[obs_index], list_proto
+        )
         assert list(arr.shape) == ([n_agents] + list(shape))
         assert np.allclose(arr, 0.1, atol=0.01)
 
 
 def test_process_visual_observation():
-    in_array_1 = np.random.rand(128, 64, 3)
+    shape = (128, 64, 3)
+    in_array_1 = np.random.rand(*shape)
     proto_obs_1 = generate_compressed_proto_obs(in_array_1)
-    in_array_2 = np.random.rand(128, 64, 3)
+    in_array_2 = np.random.rand(*shape)
     in_array_2_mapping = [0, 1, 2]
     proto_obs_2 = generate_compressed_proto_obs_with_mapping(
         in_array_2, in_array_2_mapping
@@ -286,7 +290,8 @@ def test_process_visual_observation():
     ap2 = AgentInfoProto()
     ap2.observations.extend([proto_obs_2])
     ap_list = [ap1, ap2]
-    arr = _process_maybe_compressed_observation(0, (128, 64, 3), ap_list)
+    obs_spec = create_observation_specs_with_shapes([shape])[0]
+    arr = _process_maybe_compressed_observation(0, obs_spec, ap_list)
     assert list(arr.shape) == [2, 128, 64, 3]
     assert np.allclose(arr[0, :, :, :], in_array_1, atol=0.01)
     assert np.allclose(arr[1, :, :, :], in_array_2, atol=0.01)
@@ -308,7 +313,9 @@ def test_process_visual_observation_grayscale():
     ap2 = AgentInfoProto()
     ap2.observations.extend([proto_obs_2])
     ap_list = [ap1, ap2]
-    arr = _process_maybe_compressed_observation(0, (128, 64, 1), ap_list)
+    shape = (128, 64, 1)
+    obs_spec = create_observation_specs_with_shapes([shape])[0]
+    arr = _process_maybe_compressed_observation(0, obs_spec, ap_list)
     assert list(arr.shape) == [2, 128, 64, 1]
     assert np.allclose(arr[0, :, :, :], expected_out_array_1, atol=0.01)
     assert np.allclose(arr[1, :, :, :], expected_out_array_2, atol=0.01)
@@ -325,7 +332,10 @@ def test_process_visual_observation_padded_channels():
     ap1 = AgentInfoProto()
     ap1.observations.extend([proto_obs_1])
     ap_list = [ap1]
-    arr = _process_maybe_compressed_observation(0, (128, 64, 8), ap_list)
+    shape = (128, 64, 8)
+    obs_spec = create_observation_specs_with_shapes([shape])[0]
+
+    arr = _process_maybe_compressed_observation(0, obs_spec, ap_list)
     assert list(arr.shape) == [1, 128, 64, 8]
     assert np.allclose(arr[0, :, :, :], expected_out_array_1, atol=0.01)
 
@@ -336,8 +346,12 @@ def test_process_visual_observation_bad_shape():
     ap1 = AgentInfoProto()
     ap1.observations.extend([proto_obs_1])
     ap_list = [ap1]
+
+    shape = (128, 42, 3)
+    obs_spec = create_observation_specs_with_shapes([shape])[0]
+
     with pytest.raises(UnityObservationException):
-        _process_maybe_compressed_observation(0, (128, 42, 3), ap_list)
+        _process_maybe_compressed_observation(0, obs_spec, ap_list)
 
 
 def test_batched_step_result_from_proto():
@@ -370,6 +384,20 @@ def test_batched_step_result_from_proto():
     assert terminal_steps.obs[1].shape[1] == shapes[1][0]
 
 
+def test_mismatch_observations_raise_in_step_result_from_proto():
+    n_agents = 10
+    shapes = [(3,), (4,)]
+    spec = BehaviorSpec(
+        create_observation_specs_with_shapes(shapes), ActionSpec.create_continuous(3)
+    )
+    ap_list = generate_list_agent_proto(n_agents, shapes)
+    # Hack an observation to be larger, we should get an exception
+    ap_list[0].observations[0].shape[0] += 1
+    ap_list[0].observations[0].float_data.data.append(0.42)
+    with pytest.raises(UnityObservationException):
+        steps_from_proto(ap_list, spec)
+
+
 def test_action_masking_discrete():
     n_agents = 10
     shapes = [(3,), (4,)]