Skip to content

Cherry picking #5283 and #5519 into 2.0.1-verified-patch to fix gRPC for mac m1 (MLA 2259) #5602

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 4, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions .yamato/training-backcompat-tests.yml
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@

test_mac_backcompat_2020.1:
{% capture editor_version %}2020.1{% endcapture %}
test_mac_backcompat_2019.4:
{% capture editor_version %}2019.4{% endcapture %}
{% capture csharp_backcompat_version %}1.0.0{% endcapture %}
# This test has to run on mac because it requires the custom build of tensorflow without AVX
# Test against 2020.1 because 2020.2 has to run against package version 1.2.0
name: Test Mac Backcompat Training {{ editor_version }}
agent:
type: Unity::VM::osx
image: ml-agents/ml-agents-bokken-mac:0.1.4-492264
image: ml-agents/ml-agents-bokken-mac:0.1.5-853758
flavor: b1.small
variables:
UNITY_VERSION: {{ editor_version }}
commands:
- |
python3 -m venv venv && source venv/bin/activate
python -m venv venv && source venv/bin/activate
python -m pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
python -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade
unity-downloader-cli -u {{ editor_version }} -c editor --wait --fast
Expand All @@ -23,9 +23,9 @@ test_mac_backcompat_2020.1:
python -u -m ml-agents.tests.yamato.standalone_build_tests --build-target=mac
python -u -m ml-agents.tests.yamato.training_int_tests --csharp {{ csharp_backcompat_version }}
- |
python3 -m venv venv_old && source venv_old/bin/activate
python -m venv venv_old && source venv_old/bin/activate
python -m pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
python -u -m ml-agents.tests.yamato.training_int_tests --python 0.16.0
python -u -m ml-agents.tests.yamato.training_int_tests --python 0.24.0
triggers:
cancel_old_ci: true
recurring:
Expand Down
Binary file not shown.
31 changes: 24 additions & 7 deletions com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#define MLA_SUPPORTED_TRAINING_PLATFORM
#endif

# if MLA_SUPPORTED_TRAINING_PLATFORM
#if MLA_SUPPORTED_TRAINING_PLATFORM
using Grpc.Core;
#if UNITY_EDITOR
using UnityEditor;
Expand Down Expand Up @@ -50,6 +50,7 @@ internal class RpcCommunicator : ICommunicator

/// The Unity to External client.
UnityToExternalProto.UnityToExternalProtoClient m_Client;
Channel m_Channel;

/// <summary>
/// Initializes a new instance of the RPCCommunicator class.
Expand Down Expand Up @@ -140,6 +141,7 @@ out input
Debug.Log($"Unexpected exception when trying to initialize communication: {ex}");
}
initParametersOut = new UnityRLInitParameters();
NotifyQuitAndShutDownChannel();
return false;
}

Expand Down Expand Up @@ -181,6 +183,8 @@ out input

UpdateEnvironmentWithInput(input.RlInput);
initParametersOut = initializationInput.RlInitializationInput.ToUnityRLInitParameters();
// Be sure to shut down the grpc channel when the application is quitting.
Application.quitting += NotifyQuitAndShutDownChannel;
return true;
#else
initParametersOut = new UnityRLInitParameters();
Expand Down Expand Up @@ -217,9 +221,9 @@ void UpdateEnvironmentWithInput(UnityRLInputProto rlInput)
UnityInputProto Initialize(int port, UnityOutputProto unityOutput, out UnityInputProto unityInput)
{
m_IsOpen = true;
var channel = new Channel($"localhost:{port}", ChannelCredentials.Insecure);
m_Channel = new Channel($"localhost:{port}", ChannelCredentials.Insecure);

m_Client = new UnityToExternalProto.UnityToExternalProtoClient(channel);
m_Client = new UnityToExternalProto.UnityToExternalProtoClient(m_Channel);
var result = m_Client.Exchange(WrapMessage(unityOutput, 200));
var inputMessage = m_Client.Exchange(WrapMessage(null, 200));
unityInput = inputMessage.UnityInput;
Expand All @@ -229,11 +233,24 @@ UnityInputProto Initialize(int port, UnityOutputProto unityOutput, out UnityInpu
if (result.Header.Status != 200 || inputMessage.Header.Status != 200)
{
m_IsOpen = false;
QuitCommandReceived?.Invoke();
NotifyQuitAndShutDownChannel();
}
return result.UnityInput;
}

void NotifyQuitAndShutDownChannel()
{
QuitCommandReceived?.Invoke();
try
{
m_Channel.ShutdownAsync().Wait();
}
catch (Exception)
{
// do nothing
}
}

#endregion

#region Destruction
Expand Down Expand Up @@ -269,7 +286,7 @@ void SendCommandEvent(CommandProto command)
{
case CommandProto.Quit:
{
QuitCommandReceived?.Invoke();
NotifyQuitAndShutDownChannel();
return;
}
case CommandProto.Reset:
Expand Down Expand Up @@ -456,7 +473,7 @@ UnityInputProto Exchange(UnityOutputProto unityOutput)
// Not sure if the quit command is actually sent when a
// non 200 message is received. Notify that we are indeed
// quitting.
QuitCommandReceived?.Invoke();
NotifyQuitAndShutDownChannel();
return message.UnityInput;
}
catch (Exception ex)
Expand Down Expand Up @@ -488,7 +505,7 @@ UnityInputProto Exchange(UnityOutputProto unityOutput)
}

m_IsOpen = false;
QuitCommandReceived?.Invoke();
NotifyQuitAndShutDownChannel();
return null;
}
}
Expand Down
20 changes: 6 additions & 14 deletions ml-agents/tests/yamato/training_int_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import sys
import subprocess
import time
from typing import Any

from .yamato_utils import (
find_executables,
Expand All @@ -14,7 +13,6 @@
run_standalone_build,
init_venv,
override_config_file,
override_legacy_config_file,
checkout_csharp_version,
undo_git_checkout,
)
Expand All @@ -26,7 +24,7 @@ def run_training(python_version: str, csharp_version: str) -> bool:
print(
f"Running training with python={python_version or latest} and c#={csharp_version or latest}"
)
output_dir = "models" if python_version else "results"
output_dir = "results"
onnx_file_expected = f"./{output_dir}/{run_id}/3DBall.onnx"

if os.path.exists(onnx_file_expected):
Expand Down Expand Up @@ -70,17 +68,11 @@ def run_training(python_version: str, csharp_version: str) -> bool:
# Copy the default training config but override the max_steps parameter,
# and reduce the batch_size and buffer_size enough to ensure an update step happens.
yaml_out = "override.yaml"
if python_version:
overrides: Any = {"max_steps": 100, "batch_size": 10, "buffer_size": 10}
override_legacy_config_file(
python_version, "config/trainer_config.yaml", yaml_out, **overrides
)
else:
overrides = {
"hyperparameters": {"batch_size": 10, "buffer_size": 10},
"max_steps": 100,
}
override_config_file("config/ppo/3DBall.yaml", yaml_out, overrides)
overrides = {
"hyperparameters": {"batch_size": 10, "buffer_size": 10},
"max_steps": 100,
}
override_config_file("config/ppo/3DBall.yaml", yaml_out, overrides)

log_output_path = f"{get_base_output_path()}/training.log"
env_path = os.path.join(get_base_output_path(), standalone_player_path)
Expand Down
3 changes: 0 additions & 3 deletions ml-agents/tests/yamato/yamato_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,13 +134,10 @@ def init_venv(
pip_commands = ["--upgrade pip", "--upgrade setuptools"]
if mlagents_python_version:
# install from pypi
if platform != "darwin":
raise RuntimeError("Yamato can only run tensorflow on mac platforms!")
pip_commands += [
f"mlagents=={mlagents_python_version}",
f"gym-unity=={mlagents_python_version}",
# TODO build these and publish to internal pypi
"~/tensorflow_pkg/tensorflow-2.0.0-cp37-cp37m-macosx_10_14_x86_64.whl",
"tf2onnx==1.6.1",
]
else:
Expand Down