diff --git a/services/sidecar/src/simcore_service_sidecar/utils.py b/services/sidecar/src/simcore_service_sidecar/utils.py index f6bb313f73f..18d83982717 100644 --- a/services/sidecar/src/simcore_service_sidecar/utils.py +++ b/services/sidecar/src/simcore_service_sidecar/utils.py @@ -1,21 +1,21 @@ import asyncio import logging -import aiodocker -import re -from typing import List +import os +from typing import Awaitable, List +import aiodocker import aiopg import networkx as nx -from simcore_postgres_database.sidecar_models import SUCCESS, comp_pipeline, comp_tasks from sqlalchemy import and_ -from simcore_sdk.config.rabbit import Config as RabbitConfig + from celery import Celery -from .exceptions import MoreThenOneItemDetected +from simcore_postgres_database.sidecar_models import SUCCESS, comp_pipeline, comp_tasks +from simcore_sdk.config.rabbit import Config as RabbitConfig logger = logging.getLogger(__name__) -def wrap_async_call(fct: asyncio.coroutine): +def wrap_async_call(fct: Awaitable): return asyncio.get_event_loop().run_until_complete(fct) @@ -76,58 +76,26 @@ def is_gpu_node() -> bool: """Returns True if this node has support to GPU, meaning that the `VRAM` label was added to it.""" - def get_container_id_from_cgroup(cat_cgroup_content) -> str: - """Parses the result of cat cat /proc/self/cgroup and returns a container_id or - raises an error in case only one unique id was not found.""" - possible_candidates = {x for x in cat_cgroup_content.split() if len(x) >= 64} - result_set = {x.split("/")[-1] for x in possible_candidates} - if len(result_set) != 1: - # pylint: disable=raising-format-tuple - raise MoreThenOneItemDetected( - "There should only be one entry in this set of possible container_ids" - ", have a look at %s" % possible_candidates - ) - return_value = result_set.pop() - # check if length is 64 and all char match this regex [A-Fa-f0-9] - if len(return_value) != 64 and re.findall("[A-Fa-f0-9]{64}", return_value): - # pylint: disable=raising-format-tuple - raise ValueError( - "Found container ID is not a valid sha256 string %s", return_value - ) - return return_value - async def async_is_gpu_node() -> bool: - cmd = "cat /proc/self/cgroup" - proc = await asyncio.create_subprocess_shell( - cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, - ) - - stdout, _ = await proc.communicate() - container_id = get_container_id_from_cgroup(stdout.decode("utf-8").strip()) - docker = aiodocker.Docker() - container = await docker.containers.get(container_id) - container_info = await container.show() - node_id = container_info["Config"]["Labels"]["com.docker.swarm.node.id"] - node_info = await docker.nodes.inspect(node_id=node_id) - - generic_resources = ( - node_info.get("Description", {}) - .get("Resources", {}) - .get("GenericResources", []) - ) - - has_gpu_support = False - for entry in generic_resources: - if entry.get("DiscreteResourceSpec", {}).get("Kind") == "VRAM": - has_gpu_support = True - break - - await docker.close() - - logger.info("Node GPU support: %s", has_gpu_support) - return has_gpu_support + config = { + "Cmd": "nvidia-smi", + "Image": "nvidia/cuda:10.0-base", + "AttachStdin": False, + "AttachStdout": False, + "AttachStderr": False, + "Tty": False, + "OpenStdin": False, + } + try: + await docker.containers.run( + config=config, name=f"sidecar_{os.getpid()}_test_gpu" + ) + return True + except aiodocker.exceptions.DockerError: + pass + return False return wrap_async_call(async_is_gpu_node()) diff --git a/services/sidecar/tests/unit/test_celery_configurator.py b/services/sidecar/tests/unit/test_celery_configurator.py index fe68735faa4..746dbdf6a85 100644 --- a/services/sidecar/tests/unit/test_celery_configurator.py +++ b/services/sidecar/tests/unit/test_celery_configurator.py @@ -1,68 +1,27 @@ # pylint: disable=unused-argument,redefined-outer-name,no-member -import pytest import asyncio +import aiodocker +import pytest +from celery import Celery + +from simcore_sdk.config.rabbit import Config as RabbitConfig +from simcore_service_sidecar import config from simcore_service_sidecar.celery_configurator import ( get_rabbitmq_config_and_celery_app, ) from simcore_service_sidecar.utils import is_gpu_node -from simcore_service_sidecar import config - -from celery import Celery -from simcore_sdk.config.rabbit import Config as RabbitConfig def _toggle_gpu_mock(mocker, has_gpu: bool) -> None: - # mock ouput of cat /proc/self/cgroup - CAT_DATA = b""" - 12:hugetlb:/docker/2c52ab5a825dea0b074741fb1521c972866af7997a761eb312405b50ad289263 - 11:freezer:/docker/2c52ab5a825dea0b074741fb1521c972866af7997a761eb312405b50ad289263 - 10:blkio:/docker/2c52ab5a825dea0b074741fb1521c972866af7997a761eb312405b50ad289263 - 9:devices:/docker/2c52ab5a825dea0b074741fb1521c972866af7997a761eb312405b50ad289263 - 8:net_cls,net_prio:/docker/2c52ab5a825dea0b074741fb1521c972866af7997a761eb312405b50ad289263 - 7:cpuset:/docker/2c52ab5a825dea0b074741fb1521c972866af7997a761eb312405b50ad289263 - 6:perf_event:/docker/2c52ab5a825dea0b074741fb1521c972866af7997a761eb312405b50ad289263 - 5:memory:/docker/2c52ab5a825dea0b074741fb1521c972866af7997a761eb312405b50ad289263 - 4:rdma:/ - 3:cpu,cpuacct:/docker/2c52ab5a825dea0b074741fb1521c972866af7997a761eb312405b50ad289263 - 2:pids:/docker/2c52ab5a825dea0b074741fb1521c972866af7997a761eb312405b50ad289263 - 1:name=systemd:/docker/2c52ab5a825dea0b074741fb1521c972866af7997a761eb312405b50ad289263 - 0::/system.slice/containerd.service - """ - - future = asyncio.Future() - future.set_result((CAT_DATA, None)) - comunicate = mocker.patch("asyncio.subprocess.Process.communicate") - comunicate.return_value = future - - class MockContainer: - async def show(self): - data = {"Config": {"Labels": {"com.docker.swarm.node.id": "node_id"}}} - return data - - future = asyncio.Future() - future.set_result(MockContainer()) - containers_get = mocker.patch("aiodocker.containers.DockerContainers.get") - containers_get.return_value = future - - def gpu_support_key(): - """if GPU support is enabled this Kind key must be present""" - return "Kind" if has_gpu else "_" - - payload = { - "Description": { - "Resources": { - "GenericResources": [ - {"DiscreteResourceSpec": {gpu_support_key(): "VRAM"}} - ] - } - } - } - - future = asyncio.Future() - future.set_result(payload) - containers_get = mocker.patch("aiodocker.nodes.DockerSwarmNodes.inspect") - containers_get.return_value = future + containers_get = mocker.patch( + "aiodocker.containers.DockerContainers.run", return_value=asyncio.Future() + ) + containers_get.return_value.set_result("") + if not has_gpu: + containers_get.side_effect = aiodocker.exceptions.DockerError( + "MOCK Error", {"message": "this is a mocked exception"} + ) @pytest.fixture()