diff --git a/.github/workflows/ci-testing-deploy.yml b/.github/workflows/ci-testing-deploy.yml index f6198d0d735..fbb362d2976 100644 --- a/.github/workflows/ci-testing-deploy.yml +++ b/.github/workflows/ci-testing-deploy.yml @@ -1919,7 +1919,7 @@ jobs: path: codeclimate.${{ github.job }}_coverage.json integration-test-director-v2-02: - timeout-minutes: 20 # if this timeout gets too small, then split the tests + timeout-minutes: 30 # if this timeout gets too small, then split the tests name: "[int] director-v2 02" needs: [build-test-images] runs-on: ${{ matrix.os }} diff --git a/api/specs/webserver/openapi-projects.yaml b/api/specs/webserver/openapi-projects.yaml index 0b2811e5629..66743d7840d 100644 --- a/api/specs/webserver/openapi-projects.yaml +++ b/api/specs/webserver/openapi-projects.yaml @@ -395,6 +395,55 @@ paths: default: $ref: "./openapi.yaml#/components/responses/DefaultErrorResponse" + /projects/{project_id}/nodes/{node_id}/retrieve: + parameters: + - name: project_id + in: path + required: true + schema: + type: string + - name: node_id + in: path + required: true + schema: + type: string + + post: + tags: + - project + description: Triggers service retrieve + operationId: post_retrieve + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + port_keys: + description: list of por keys to be retrieved + type: array + items: + type: string + responses: + "200": + description: Returns the amount of transferred bytes when pulling data via nodeports + content: + application/json: + schema: + type: object + properties: + data: + type: object + description: response payload + properties: + size_bytes: + type: integer + description: amount of transferred bytes + + default: + $ref: "#/components/responses/DefaultErrorResponse" + /projects/{study_uuid}/tags/{tag_id}: parameters: - name: tag_id diff --git a/api/specs/webserver/openapi.yaml b/api/specs/webserver/openapi.yaml index 9c8259857d9..72a92f31011 100644 --- a/api/specs/webserver/openapi.yaml +++ b/api/specs/webserver/openapi.yaml @@ -200,6 +200,9 @@ paths: /projects/{project_id}/nodes/{node_id}: $ref: "./openapi-projects.yaml#/paths/~1projects~1{project_id}~1nodes~1{node_id}" + /projects/{project_id}/nodes/{node_id}:retrieve: + $ref: "./openapi-projects.yaml#/paths/~1projects~1{project_id}~1nodes~1{node_id}~1retrieve" + /nodes/{nodeInstanceUUID}/outputUi/{outputKey}: $ref: "./openapi-node-v0.0.1.yaml#/paths/~1nodes~1{nodeInstanceUUID}~1outputUi~1{outputKey}" diff --git a/ci/github/integration-testing/director-v2.bash b/ci/github/integration-testing/director-v2.bash index 746fe184f68..f36aac91502 100755 --- a/ci/github/integration-testing/director-v2.bash +++ b/ci/github/integration-testing/director-v2.bash @@ -23,6 +23,7 @@ test() { echo "testing in services/director-v2/tests/integration/$1" pytest --cov=simcore_service_director_v2 --durations=10 --cov-append \ --color=yes --cov-report=term-missing --cov-report=xml --cov-config=.coveragerc \ + -vvv -s --log-cli-level=DEBUG \ -v -m "not travis" "services/director-v2/tests/integration/$1" --log-level=DEBUG } diff --git a/packages/pytest-simcore/src/pytest_simcore/docker_registry.py b/packages/pytest-simcore/src/pytest_simcore/docker_registry.py index 0ea215fd73a..df140f01399 100644 --- a/packages/pytest-simcore/src/pytest_simcore/docker_registry.py +++ b/packages/pytest-simcore/src/pytest_simcore/docker_registry.py @@ -230,12 +230,14 @@ def jupyter_service(docker_registry: str, node_meta_schema: Dict) -> Dict[str, s ) -DY_STATIC_FILE_SERVER_VERSION = "1.0.5" +@pytest.fixture(scope="session", params=["2.0.2"]) +def dy_static_file_server_version(request): + return request.param @pytest.fixture(scope="session") def dy_static_file_server_service( - docker_registry: str, node_meta_schema: Dict + docker_registry: str, node_meta_schema: Dict, dy_static_file_server_version: str ) -> Dict[str, str]: """ Adds the below service in docker registry @@ -243,7 +245,7 @@ def dy_static_file_server_service( """ return _pull_push_service( "itisfoundation/dy-static-file-server", - DY_STATIC_FILE_SERVER_VERSION, + dy_static_file_server_version, docker_registry, node_meta_schema, ) @@ -251,7 +253,7 @@ def dy_static_file_server_service( @pytest.fixture(scope="session") def dy_static_file_server_dynamic_sidecar_service( - docker_registry: str, node_meta_schema: Dict + docker_registry: str, node_meta_schema: Dict, dy_static_file_server_version: str ) -> Dict[str, str]: """ Adds the below service in docker registry @@ -259,7 +261,7 @@ def dy_static_file_server_dynamic_sidecar_service( """ return _pull_push_service( "itisfoundation/dy-static-file-server-dynamic-sidecar", - DY_STATIC_FILE_SERVER_VERSION, + dy_static_file_server_version, docker_registry, node_meta_schema, ) @@ -267,7 +269,7 @@ def dy_static_file_server_dynamic_sidecar_service( @pytest.fixture(scope="session") def dy_static_file_server_dynamic_sidecar_compose_spec_service( - docker_registry: str, node_meta_schema: Dict + docker_registry: str, node_meta_schema: Dict, dy_static_file_server_version: str ) -> Dict[str, str]: """ Adds the below service in docker registry @@ -275,7 +277,7 @@ def dy_static_file_server_dynamic_sidecar_compose_spec_service( """ return _pull_push_service( "itisfoundation/dy-static-file-server-dynamic-sidecar-compose-spec", - DY_STATIC_FILE_SERVER_VERSION, + dy_static_file_server_version, docker_registry, node_meta_schema, ) diff --git a/packages/service-library/src/servicelib/async_utils.py b/packages/service-library/src/servicelib/async_utils.py index 6e2a0db5a3c..05045dc8378 100644 --- a/packages/service-library/src/servicelib/async_utils.py +++ b/packages/service-library/src/servicelib/async_utils.py @@ -2,12 +2,25 @@ import logging from collections import deque from functools import wraps -from typing import Dict, List, Optional +from typing import TYPE_CHECKING, Any, Callable, Deque, Dict, List, Optional import attr logger = logging.getLogger(__name__) +if TYPE_CHECKING: + Queue = asyncio.Queue +else: + + class FakeGenericMeta(type): + def __getitem__(self, item): + return self + + class Queue( + asyncio.Queue, metaclass=FakeGenericMeta + ): # pylint: disable=function-redefined + pass + @attr.s(auto_attribs=True) class Context: @@ -30,7 +43,9 @@ async def stop_sequential_workers() -> None: logger.info("All run_sequentially_in_context pending workers stopped") -def run_sequentially_in_context(target_args: List[str] = None): +def run_sequentially_in_context( + target_args: List[str] = None, +) -> Callable[[Any], Any]: """All request to function with same calling context will be run sequentially. Example: @@ -68,15 +83,17 @@ async def func(param1, param2, param3): """ target_args = [] if target_args is None else target_args - def internal(decorated_function): - def get_context(args, kwargs: Dict) -> Context: + def internal( + decorated_function: Callable[[Any], Optional[Any]] + ) -> Callable[[Any], Optional[Any]]: + def get_context(args: Any, kwargs: Dict[Any, Any]) -> Context: arg_names = decorated_function.__code__.co_varnames[ : decorated_function.__code__.co_argcount ] search_args = dict(zip(arg_names, args)) search_args.update(kwargs) - key_parts = deque() + key_parts: Deque[str] = deque() for arg in target_args: sub_args = arg.split(".") main_arg = sub_args[0] @@ -108,13 +125,13 @@ def get_context(args, kwargs: Dict) -> Context: return _sequential_jobs_contexts[key] @wraps(decorated_function) - async def wrapper(*args, **kwargs): + async def wrapper(*args: Any, **kwargs: Any) -> Any: context: Context = get_context(args, kwargs) if not context.initialized: context.initialized = True - async def worker(in_q: asyncio.Queue, out_q: asyncio.Queue): + async def worker(in_q: Queue, out_q: Queue) -> None: while True: awaitable = await in_q.get() in_q.task_done() @@ -137,7 +154,7 @@ async def worker(in_q: asyncio.Queue, out_q: asyncio.Queue): worker(context.in_queue, context.out_queue) ) - await context.in_queue.put(decorated_function(*args, **kwargs)) + await context.in_queue.put(decorated_function(*args, **kwargs)) # type: ignore wrapped_result = await context.out_queue.get() if isinstance(wrapped_result, Exception): diff --git a/packages/service-library/src/servicelib/pools.py b/packages/service-library/src/servicelib/pools.py index 11b03178f91..a67e33aef5e 100644 --- a/packages/service-library/src/servicelib/pools.py +++ b/packages/service-library/src/servicelib/pools.py @@ -1,5 +1,7 @@ +import asyncio from concurrent.futures import ProcessPoolExecutor from contextlib import contextmanager +from typing import Any, Callable # only gets created on use and is guaranteed to be the s # ame for the entire lifetime of the application @@ -35,3 +37,10 @@ def non_blocking_process_pool_executor(**kwargs) -> ProcessPoolExecutor: # FIXME: uncomment below line when the issue is fixed # executor.shutdown(wait=False) pass + + +async def async_on_threadpool(callable_function: Callable, *args: Any) -> Any: + """Ensures blocking operation runs on shared thread pool""" + return await asyncio.get_event_loop().run_in_executor( + None, callable_function, *args + ) diff --git a/packages/service-library/src/servicelib/utils.py b/packages/service-library/src/servicelib/utils.py index 06599816792..2f993135197 100644 --- a/packages/service-library/src/servicelib/utils.py +++ b/packages/service-library/src/servicelib/utils.py @@ -7,7 +7,6 @@ import asyncio import logging import os - from pathlib import Path from typing import Any, Awaitable, Coroutine, List, Optional, Union @@ -79,8 +78,11 @@ def log_exception_callback(fut: asyncio.Future): # // tasks async def logged_gather( - *tasks, reraise: bool = True, log: logging.Logger = logger, max_concurrency: int = 0 -) -> List[Any]: + *tasks: Awaitable[Any], + reraise: bool = True, + log: logging.Logger = logger, + max_concurrency: int = 0, +) -> List[Optional[Any]]: """ Thin wrapper around asyncio.gather that allows excuting ALL tasks concurently until the end even if any of them fail. Finally, all errors are logged and the first raised (if reraise=True) @@ -91,18 +93,15 @@ async def logged_gather( use directly asyncio.gather(*tasks, return_exceptions=True). :param reraise: reraises first exception (in order the tasks were passed) concurrent tasks, defaults to True - :type reraise: bool, optional :param log: passing the logger gives a chance to identify the origin of the gather call, defaults to current submodule's logger - :type log: logging.Logger, optional :return: list of tasks results and errors e.g. [1, 2, ValueError("task3 went wrong"), 33, "foo"] - :rtype: List[Any] """ wrapped_tasks = tasks if max_concurrency > 0: semaphore = asyncio.Semaphore(max_concurrency) - async def sem_task(task): + async def sem_task(task: Awaitable[Any]) -> Any: async with semaphore: return await task diff --git a/packages/service-library/tests/test_pools.py b/packages/service-library/tests/test_pools.py index f3e80917b94..6c91dd7ed7f 100644 --- a/packages/service-library/tests/test_pools.py +++ b/packages/service-library/tests/test_pools.py @@ -1,8 +1,7 @@ from asyncio import BaseEventLoop from concurrent.futures import ProcessPoolExecutor - -from servicelib.pools import non_blocking_process_pool_executor +from servicelib.pools import async_on_threadpool, non_blocking_process_pool_executor def return_int_one() -> int: @@ -32,3 +31,7 @@ async def test_different_pool_instances() -> None: max_workers=1 ) as first, non_blocking_process_pool_executor() as second: assert first != second + + +async def test_run_on_thread_pool() -> None: + assert await async_on_threadpool(return_int_one) == 1 diff --git a/packages/simcore-sdk/requirements/_test.in b/packages/simcore-sdk/requirements/_test.in index cfb9607f2dc..c6bd1e2dfc4 100644 --- a/packages/simcore-sdk/requirements/_test.in +++ b/packages/simcore-sdk/requirements/_test.in @@ -18,6 +18,7 @@ pytest-mock pytest-runner pytest-sugar pytest-xdist +pytest-lazy-fixture # mockups/fixtures alembic diff --git a/packages/simcore-sdk/requirements/_test.txt b/packages/simcore-sdk/requirements/_test.txt index a339e40bcf6..c6edc7ecb51 100644 --- a/packages/simcore-sdk/requirements/_test.txt +++ b/packages/simcore-sdk/requirements/_test.txt @@ -131,6 +131,7 @@ pytest==6.2.5 # pytest-forked # pytest-icdiff # pytest-instafail + # pytest-lazy-fixture # pytest-mock # pytest-sugar # pytest-xdist @@ -144,6 +145,8 @@ pytest-icdiff==0.5 # via -r requirements/_test.in pytest-instafail==0.4.2 # via -r requirements/_test.in +pytest-lazy-fixture==0.6.3 + # via -r requirements/_test.in pytest-mock==3.6.1 # via -r requirements/_test.in pytest-runner==5.3.1 diff --git a/packages/simcore-sdk/src/simcore_sdk/node_data/data_manager.py b/packages/simcore-sdk/src/simcore_sdk/node_data/data_manager.py index b811d3e0e99..c4d1c90f577 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_data/data_manager.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_data/data_manager.py @@ -66,36 +66,50 @@ async def push( return await _push_file(user_id, project_id, node_uuid, archive_file_path, None) -async def _pull_file(user_id: int, project_id: str, node_uuid: str, file_path: Path): +async def _pull_file( + user_id: int, + project_id: str, + node_uuid: str, + file_path: Path, + save_to: Optional[Path] = None, +): + destination_path = file_path if save_to is None else save_to s3_object = _create_s3_object(project_id, node_uuid, file_path) log.info("pulling data from %s to %s...", s3_object, file_path) downloaded_file = await filemanager.download_file_from_s3( user_id=user_id, store_id="0", s3_object=s3_object, - local_folder=file_path.parent, + local_folder=destination_path.parent, ) - if downloaded_file != file_path: - if file_path.exists(): - file_path.unlink() - move(f"{downloaded_file}", file_path) - log.info("%s successfuly pulled", file_path) + if downloaded_file != destination_path: + destination_path.unlink(missing_ok=True) + move(f"{downloaded_file}", destination_path) + log.info("%s successfuly pulled", destination_path) def _get_archive_name(path: Path) -> str: return f"{path.stem}.zip" -async def pull(user_id: int, project_id: str, node_uuid: str, file_or_folder: Path): +async def pull( + user_id: int, + project_id: str, + node_uuid: str, + file_or_folder: Path, + save_to: Optional[Path] = None, +): if file_or_folder.is_file(): - return await _pull_file(user_id, project_id, node_uuid, file_or_folder) + return await _pull_file(user_id, project_id, node_uuid, file_or_folder, save_to) # we have a folder, so we need somewhere to extract it to with TemporaryDirectory() as tmp_dir_name: archive_file = Path(tmp_dir_name) / _get_archive_name(file_or_folder) await _pull_file(user_id, project_id, node_uuid, archive_file) log.info("extracting data from %s", archive_file) + + destination_folder = file_or_folder if save_to is None else save_to await unarchive_dir( - archive_to_extract=archive_file, destination_folder=file_or_folder + archive_to_extract=archive_file, destination_folder=destination_folder ) log.info("extraction completed") diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/dbmanager.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/dbmanager.py index 848cd89c78a..d655470988c 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/dbmanager.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/dbmanager.py @@ -101,7 +101,11 @@ def __init__(self, db_engine: Optional[aiopg.sa.Engine] = None): async def write_ports_configuration( self, json_configuration: str, project_id: str, node_uuid: str ): - log.debug("Writing ports configuration to database") + message = ( + f"Writing port configuration to database for " + f"project={project_id} node={node_uuid}: {json_configuration}" + ) + log.debug(message) node_configuration = json.loads(json_configuration) async with DBContextManager(self._db_engine) as engine: diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/serialization_v2.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/serialization_v2.py index fb89ccee591..9f8ac86a58d 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/serialization_v2.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/serialization_v2.py @@ -43,6 +43,8 @@ async def load( project_id, node_uuid ) port_cfg = json.loads(port_config_str) + + log.debug(f"{port_cfg=}") # pylint: disable=logging-fstring-interpolation if any(k not in port_cfg for k in NODE_REQUIRED_KEYS): raise InvalidProtocolError( port_cfg, "nodeport in comp_task does not follow protocol" diff --git a/packages/simcore-sdk/tests/integration/test_node_data_data_manager.py b/packages/simcore-sdk/tests/integration/test_node_data_data_manager.py new file mode 100644 index 00000000000..9ac5179876f --- /dev/null +++ b/packages/simcore-sdk/tests/integration/test_node_data_data_manager.py @@ -0,0 +1,201 @@ +# pylint:disable=unused-variable +# pylint:disable=unused-argument +# pylint:disable=redefined-outer-name +# pylint:disable=too-many-arguments + +import hashlib +import os +from asyncio import BaseEventLoop +from pathlib import Path +from typing import Callable, Set, Tuple +from uuid import uuid4 + +import pytest +from simcore_sdk.node_data import data_manager + +pytest_simcore_core_services_selection = ["postgres", "storage"] + +pytest_simcore_ops_services_selection = ["minio", "adminer"] + + +# UTILS + + +def _remove_file_or_folder(file_or_folder: Path) -> None: + if file_or_folder.is_file(): + file_or_folder.unlink() + assert file_or_folder.exists() is False + file_or_folder.touch() + assert file_or_folder.exists() is True + else: + os.system(f"rm -rf {file_or_folder}") + assert file_or_folder.exists() is False + file_or_folder.mkdir(parents=True, exist_ok=True) + assert file_or_folder.exists() is True + + +def _get_file_hashes_in_path(path_to_hash: Path) -> Set[Tuple[Path, str]]: + def _hash_path(path: Path): + sha256_hash = hashlib.sha256() + with open(path, "rb") as f: + # Read and update hash string value in blocks of 4K + for byte_block in iter(lambda: f.read(4096), b""): + sha256_hash.update(byte_block) + return sha256_hash.hexdigest() + + def _relative_path(root_path: Path, full_path: Path) -> Path: + return full_path.relative_to(root_path) + + if path_to_hash.is_file(): + return {(_relative_path(path_to_hash, path_to_hash), _hash_path(path_to_hash))} + + return { + (_relative_path(path_to_hash, path), _hash_path(path)) + for path in path_to_hash.rglob("*") + } + + +def _make_file_with_content(file_path: Path) -> Path: + content = " ".join(f"{uuid4()}" for x in range(10)) + file_path.write_text(content) + assert file_path.exists() + return file_path + + +def _make_dir_with_files(temp_dir: Path, file_count: int) -> Path: + assert file_count > 0 + + content_dir_path = temp_dir / f"content_dir{uuid4()}" + content_dir_path.mkdir(parents=True, exist_ok=True) + + for _ in range(file_count): + _make_file_with_content(file_path=content_dir_path / f"{uuid4()}_test.txt") + + return content_dir_path + + +# FIXTURES + + +@pytest.fixture +def node_uuid() -> str: + return f"{uuid4()}" + + +@pytest.fixture +def temp_dir(tmpdir: Path) -> Path: + return Path(tmpdir) + + +@pytest.fixture +def random_tmp_dir_generator(temp_dir: Path) -> Callable[[bool], Path]: + def generator(is_file: bool) -> Path: + random_dir_path = temp_dir / f"{uuid4()}" + random_dir_path.mkdir(parents=True, exist_ok=True) + if is_file: + file_path = random_dir_path / f"{uuid4()}_test.txt" + file_path.touch() + return file_path + + return random_dir_path + + return generator + + +@pytest.fixture +def file_content_path(temp_dir: Path) -> Path: + return _make_file_with_content(file_path=temp_dir / f"{uuid4()}_test.txt") + + +@pytest.fixture +def dir_content_one_file_path(temp_dir: Path) -> Path: + return _make_dir_with_files(temp_dir, file_count=1) + + +@pytest.fixture +def dir_content_multiple_files_path(temp_dir: Path) -> Path: + return _make_dir_with_files(temp_dir, file_count=2) + + +@pytest.mark.parametrize( + "content_path", + [ + # pylint: disable=no-member + pytest.lazy_fixture("file_content_path"), + pytest.lazy_fixture("dir_content_one_file_path"), + pytest.lazy_fixture("dir_content_multiple_files_path"), + ], +) +async def test_valid_upload_download( + loop: BaseEventLoop, + filemanager_cfg: None, + content_path: Path, + user_id: int, + project_id: str, + node_uuid: str, +): + await data_manager.push( + user_id=user_id, + project_id=project_id, + node_uuid=node_uuid, + file_or_folder=content_path, + ) + + uploaded_hashes = _get_file_hashes_in_path(content_path) + + _remove_file_or_folder(content_path) + + await data_manager.pull( + user_id=user_id, + project_id=project_id, + node_uuid=node_uuid, + file_or_folder=content_path, + ) + + downloaded_hashes = _get_file_hashes_in_path(content_path) + + assert uploaded_hashes == downloaded_hashes + + +@pytest.mark.parametrize( + "content_path", + [ + # pylint: disable=no-member + pytest.lazy_fixture("file_content_path"), + pytest.lazy_fixture("dir_content_one_file_path"), + pytest.lazy_fixture("dir_content_multiple_files_path"), + ], +) +async def test_valid_upload_download_saved_to( + loop: BaseEventLoop, + filemanager_cfg: None, + content_path: Path, + user_id: int, + project_id: str, + node_uuid: str, + random_tmp_dir_generator: Callable, +): + await data_manager.push( + user_id=user_id, + project_id=project_id, + node_uuid=node_uuid, + file_or_folder=content_path, + ) + + uploaded_hashes = _get_file_hashes_in_path(content_path) + + _remove_file_or_folder(content_path) + + new_destination = random_tmp_dir_generator(is_file=content_path.is_file()) + + await data_manager.pull( + user_id=user_id, + project_id=project_id, + node_uuid=node_uuid, + file_or_folder=content_path, + save_to=new_destination, + ) + + downloaded_hashes = _get_file_hashes_in_path(new_destination) + + assert uploaded_hashes == downloaded_hashes diff --git a/services/api-server/Dockerfile b/services/api-server/Dockerfile index 524ea060378..979dfbd8cfc 100644 --- a/services/api-server/Dockerfile +++ b/services/api-server/Dockerfile @@ -10,10 +10,10 @@ FROM python:${PYTHON_VERSION}-slim-buster as base LABEL maintainer=pcrespov -RUN set -eux; \ - apt-get update; \ - apt-get install -y gosu; \ - rm -rf /var/lib/apt/lists/*; \ +RUN set -eux && \ + apt-get update && \ + apt-get install -y gosu && \ + rm -rf /var/lib/apt/lists/* && \ # verify that the binary works gosu nobody true diff --git a/services/catalog/Dockerfile b/services/catalog/Dockerfile index 7c4e3527190..398f5cf80e9 100644 --- a/services/catalog/Dockerfile +++ b/services/catalog/Dockerfile @@ -11,12 +11,12 @@ FROM python:${PYTHON_VERSION}-slim-buster as base LABEL maintainer=pcrespov -RUN set -eux; \ - apt-get update; \ - apt-get install -y gosu; \ - rm -rf /var/lib/apt/lists/*; \ - # verify that the binary works - gosu nobody true +RUN set -eux && \ + apt-get update && \ + apt-get install -y gosu && \ + rm -rf /var/lib/apt/lists/* && \ + # verify that the binary works + gosu nobody true # simcore-user uid=8004(scu) gid=8004(scu) groups=8004(scu) ENV SC_USER_ID=8004 \ diff --git a/services/datcore-adapter/Dockerfile b/services/datcore-adapter/Dockerfile index c65593aea92..7156c6afe50 100644 --- a/services/datcore-adapter/Dockerfile +++ b/services/datcore-adapter/Dockerfile @@ -13,10 +13,10 @@ FROM python:${PYTHON_VERSION}-slim-buster as base LABEL maintainer=sanderegg -RUN set -eux; \ - apt-get update; \ - apt-get install -y --no-install-recommends gosu; \ - rm -rf /var/lib/apt/lists/*; \ +RUN set -eux && \ + apt-get update && \ + apt-get install -y --no-install-recommends gosu && \ + rm -rf /var/lib/apt/lists/* && \ # verify that the binary works gosu nobody true diff --git a/services/director-v2/Dockerfile b/services/director-v2/Dockerfile index 04143aca705..da72af082ad 100644 --- a/services/director-v2/Dockerfile +++ b/services/director-v2/Dockerfile @@ -11,10 +11,10 @@ FROM python:${PYTHON_VERSION}-slim-buster as base LABEL maintainer=pcrespov -RUN set -eux; \ - apt-get update; \ - apt-get install -y --no-install-recommends gosu; \ - rm -rf /var/lib/apt/lists/*; \ +RUN set -eux && \ + apt-get update && \ + apt-get install -y --no-install-recommends gosu && \ + rm -rf /var/lib/apt/lists/* && \ # verify that the binary works gosu nobody true diff --git a/services/director-v2/openapi.json b/services/director-v2/openapi.json index a3967b35f1f..3c469e567f5 100644 --- a/services/director-v2/openapi.json +++ b/services/director-v2/openapi.json @@ -794,7 +794,7 @@ "type": "boolean", "default": true }, - "name": "save_state", + "name": "can_save", "in": "query" } ], diff --git a/services/director-v2/requirements/_test.in b/services/director-v2/requirements/_test.in index f3c4455af54..047436c50a7 100644 --- a/services/director-v2/requirements/_test.in +++ b/services/director-v2/requirements/_test.in @@ -23,6 +23,7 @@ pytest-icdiff pytest-lazy-fixture async-asgi-testclient nest-asyncio # required for async tests and mixing starlette test_client +minio # fixtures Faker diff --git a/services/director-v2/requirements/_test.txt b/services/director-v2/requirements/_test.txt index 487b58c8d55..ece4f37c002 100644 --- a/services/director-v2/requirements/_test.txt +++ b/services/director-v2/requirements/_test.txt @@ -61,6 +61,7 @@ certifi==2021.5.30 # via # -c requirements/_base.txt # httpx + # minio # requests cffi==1.14.6 # via @@ -178,6 +179,10 @@ markupsafe==1.1.1 # mako mccabe==0.6.1 # via pylint +minio==7.0.4 + # via + # -c requirements/../../../requirements/constraints.txt + # -r requirements/_test.in multidict==5.1.0 # via # -c requirements/_base.txt @@ -323,6 +328,7 @@ urllib3==1.26.6 # via # -c requirements/../../../requirements/constraints.txt # -c requirements/_base.txt + # minio # requests vine==5.0.0 # via diff --git a/services/director-v2/requirements/ci.txt b/services/director-v2/requirements/ci.txt index 6af10b1bfbe..4c3787abaca 100644 --- a/services/director-v2/requirements/ci.txt +++ b/services/director-v2/requirements/ci.txt @@ -17,6 +17,7 @@ ../../packages/pytest-simcore/ ../../packages/service-library[fastapi] ../../packages/settings-library/ +../../packages/simcore-sdk # Needed ONLY for testing # installs current package . diff --git a/services/director-v2/requirements/dev.txt b/services/director-v2/requirements/dev.txt index 90b3559139f..5f79af98be5 100644 --- a/services/director-v2/requirements/dev.txt +++ b/services/director-v2/requirements/dev.txt @@ -17,6 +17,7 @@ --editable ../../packages/pytest-simcore/ --editable ../../packages/service-library[fastapi] --editable ../../packages/settings-library/ +--editable ../../packages/simcore-sdk # Needed ONLY for testing # installs current package --editable . diff --git a/services/director-v2/src/simcore_service_director_v2/api/routes/dynamic_services.py b/services/director-v2/src/simcore_service_director_v2/api/routes/dynamic_services.py index 421e8182d0b..0260d0c46a1 100644 --- a/services/director-v2/src/simcore_service_director_v2/api/routes/dynamic_services.py +++ b/services/director-v2/src/simcore_service_director_v2/api/routes/dynamic_services.py @@ -4,7 +4,7 @@ from uuid import UUID import httpx -from fastapi import APIRouter, Depends, Header +from fastapi import APIRouter, Depends, Header, Request from fastapi.responses import RedirectResponse from models_library.projects import ProjectID from models_library.projects_nodes import NodeID @@ -13,7 +13,7 @@ from starlette import status from starlette.datastructures import URL -from ...core.settings import DynamicServicesSettings +from ...core.settings import DynamicServicesSettings, DynamicSidecarSettings from ...models.domains.dynamic_services import ( DynamicServiceCreate, DynamicServiceOut, @@ -163,18 +163,18 @@ async def get_dynamic_sidecar_status( ) async def stop_dynamic_service( node_uuid: NodeID, - save_state: Optional[bool] = True, + can_save: Optional[bool] = True, director_v0_client: DirectorV0Client = Depends(get_director_v0_client), scheduler: DynamicSidecarsScheduler = Depends(get_scheduler), ) -> Union[NoContentResponse, RedirectResponse]: try: - await scheduler.remove_service(node_uuid, save_state) + await scheduler.mark_service_for_removal(node_uuid, can_save) except DynamicSidecarNotFoundError: # legacy service? if it's not then a 404 will anyway be received # forward to director-v0 redirection_url = director_v0_client.client.base_url.copy_with( path=f"/v0/running_interactive_services/{node_uuid}", - params={"save_state": bool(save_state)}, + params={"can_save": bool(can_save)}, ) return RedirectResponse(str(redirection_url)) @@ -190,18 +190,39 @@ async def stop_dynamic_service( ) @log_decorator(logger=logger) async def service_retrieve_data_on_ports( + request: Request, + node_uuid: NodeID, retrieve_settings: RetrieveDataIn, - service_base_url: URL = Depends(get_service_base_url), - services_client: ServicesClient = Depends(get_services_client), -): - # the handling of client/server errors is already encapsulated in the call to request - resp = await services_client.request( - "POST", - f"{service_base_url}/retrieve", - data=retrieve_settings.json(by_alias=True), - timeout=httpx.Timeout( - 5.0, read=60 * 60.0 - ), # this call waits for the service to download data - ) - # validate and return - return RetrieveDataOutEnveloped.parse_obj(resp.json()) + scheduler: DynamicSidecarsScheduler = Depends(get_scheduler), +) -> RetrieveDataOutEnveloped: + try: + return await scheduler.retrieve_service_inputs( + node_uuid, retrieve_settings.port_keys + ) + except DynamicSidecarNotFoundError: + # in case of legacy service, no redirect will be used + # makes request to director-v0 and sends back reply + + service_base_url: URL = await get_service_base_url( + node_uuid, get_director_v0_client(request) + ) + services_client: ServicesClient = get_services_client(request) + + dynamic_sidecar_settings: DynamicSidecarSettings = ( + request.app.state.settings.DYNAMIC_SERVICES.DYNAMIC_SIDECAR + ) + timeout = httpx.Timeout( + dynamic_sidecar_settings.DYNAMIC_SIDECAR_API_SAVE_RESTORE_STATE_TIMEOUT, + connect=dynamic_sidecar_settings.DYNAMIC_SIDECAR_API_CONNECT_TIMEOUT, + ) + + # this call waits for the service to download data + response = await services_client.request( + "POST", + f"{service_base_url}/retrieve", + data=retrieve_settings.json(by_alias=True), + timeout=timeout, + ) + + # validate and return + return RetrieveDataOutEnveloped.parse_obj(response.json()) diff --git a/services/director-v2/src/simcore_service_director_v2/core/settings.py b/services/director-v2/src/simcore_service_director_v2/core/settings.py index 6374e149d2b..fb3b08e9ec1 100644 --- a/services/director-v2/src/simcore_service_director_v2/core/settings.py +++ b/services/director-v2/src/simcore_service_director_v2/core/settings.py @@ -98,6 +98,10 @@ class DynamicSidecarSettings(BaseCustomSettings): False, description="exposes the service on localhost for debuging and testing", ) + PROXY_EXPOSE_PORT: bool = Field( + False, + description="exposes the proxy on localhost for debuging and testing", + ) SIMCORE_SERVICES_NETWORK_NAME: str = Field( ..., @@ -126,8 +130,16 @@ class DynamicSidecarSettings(BaseCustomSettings): "twards the dynamic-sidecar, as is the case with the above timeout field." ), ) + DYNAMIC_SIDECAR_API_SAVE_RESTORE_STATE_TIMEOUT: PositiveFloat = Field( + 60 * MINS, + description=( + "When saving and restoring the state of a dynamic service, depending on the payload " + "some services take longer or shorter to save and restore. Across the " + "platform this value is set to 1 hour." + ), + ) DYNAMIC_SIDECAR_WAIT_FOR_CONTAINERS_TO_START: PositiveFloat = Field( - 60.0 * 60.0, + 60.0 * MINS, description=( "After running `docker-compose up`, images might need to be pulled " "before everything is started. Using default 1hour timeout to let this " diff --git a/services/director-v2/src/simcore_service_director_v2/models/domains/dynamic_services.py b/services/director-v2/src/simcore_service_director_v2/models/domains/dynamic_services.py index 6eb570061d9..2b9ba0cd963 100644 --- a/services/director-v2/src/simcore_service_director_v2/models/domains/dynamic_services.py +++ b/services/director-v2/src/simcore_service_director_v2/models/domains/dynamic_services.py @@ -1,7 +1,7 @@ from typing import List from models_library.services import PropertyName -from pydantic import BaseModel, Field +from pydantic import BaseModel, ByteSize, Field from ..schemas.dynamic_services import RunningDynamicServiceDetails, ServiceDetails @@ -13,7 +13,7 @@ class RetrieveDataIn(BaseModel): class RetrieveDataOut(BaseModel): - size_bytes: int = Field( + size_bytes: ByteSize = Field( ..., description="The amount of data transferred by the retrieve call" ) @@ -21,6 +21,15 @@ class RetrieveDataOut(BaseModel): class RetrieveDataOutEnveloped(BaseModel): data: RetrieveDataOut + @classmethod + def from_transferred_bytes( + cls, transferred_bytes: int + ) -> "RetrieveDataOutEnveloped": + return cls(data=RetrieveDataOut(size_bytes=transferred_bytes)) + + class Config: + schema_extra = {"examples": [{"data": {"size_bytes": 42}}]} + DynamicServiceCreate = ServiceDetails DynamicServiceOut = RunningDynamicServiceDetails diff --git a/services/director-v2/src/simcore_service_director_v2/models/schemas/dynamic_services/scheduler.py b/services/director-v2/src/simcore_service_director_v2/models/schemas/dynamic_services/scheduler.py index 3ddf7586564..c8779b7c9ca 100644 --- a/services/director-v2/src/simcore_service_director_v2/models/schemas/dynamic_services/scheduler.py +++ b/services/director-v2/src/simcore_service_director_v2/models/schemas/dynamic_services/scheduler.py @@ -107,6 +107,32 @@ def from_container(cls, container: Dict[str, Any]) -> "DockerContainerInspect": ) +class ServiceRemovalState(BaseModel): + can_remove: bool = Field( + False, + description="when True, marks the service as ready to be removed", + ) + can_save: Optional[bool] = Field( + None, + description="when True, saves the internal state and upload outputs of the service", + ) + was_removed: bool = Field( + False, + description=( + "Will be True when the removal finished. Used primarily " + "to cancel retrying long running operations." + ), + ) + + def mark_to_remove(self, can_save: Optional[bool]) -> None: + self.can_remove = True + self.can_save = can_save + + def mark_removed(self) -> None: + self.can_remove = False + self.was_removed = True + + class DynamicSidecar(BaseModel): status: Status = Field( Status.create_as_initially_ok(), @@ -153,6 +179,24 @@ def compose_spec_submitted(self) -> bool: ), ) + service_environment_prepared: bool = Field( + False, + description=( + "True when the environment setup required by the " + "dynamic-sidecars created services was completed." + "Example: nodeports data downloaded, globally " + "shared service data fetched, etc.." + ), + ) + + service_removal_state: ServiceRemovalState = Field( + default_factory=ServiceRemovalState, + description=( + "stores information used during service removal " + "from the dynamic-sidecar scheduler" + ), + ) + # below had already been validated and # used only to start the proxy dynamic_sidecar_id: Optional[ServiceId] = Field( diff --git a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/client_api.py b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/client_api.py index 19e044640cb..dcb68b943e2 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/client_api.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/client_api.py @@ -1,7 +1,7 @@ import json import logging import traceback -from typing import Any, Dict +from typing import Any, Dict, List, Optional import httpx from fastapi import FastAPI @@ -60,6 +60,10 @@ def __init__(self, app: FastAPI): dynamic_sidecar_settings.DYNAMIC_SIDECAR_API_REQUEST_TIMEOUT, connect=dynamic_sidecar_settings.DYNAMIC_SIDECAR_API_CONNECT_TIMEOUT, ) + self._save_restore_timeout = httpx.Timeout( + dynamic_sidecar_settings.DYNAMIC_SIDECAR_API_SAVE_RESTORE_STATE_TIMEOUT, + connect=dynamic_sidecar_settings.DYNAMIC_SIDECAR_API_CONNECT_TIMEOUT, + ) async def is_healthy(self, dynamic_sidecar_endpoint: str) -> bool: """returns True if service is UP and running else False""" @@ -126,7 +130,7 @@ async def start_service_creation( try: async with httpx.AsyncClient(timeout=self._base_timeout) as client: response = await client.post(url, data=compose_spec) - if response.status_code != 202: + if response.status_code != status.HTTP_202_ACCEPTED: message = ( f"ERROR during service creation request: " f"status={response.status_code}, body={response.text}" @@ -159,6 +163,77 @@ async def begin_service_destruction(self, dynamic_sidecar_endpoint: str) -> None log_httpx_http_error(url, "POST", traceback.format_exc()) raise e + async def service_save_state(self, dynamic_sidecar_endpoint: str) -> None: + url = get_url(dynamic_sidecar_endpoint, "/v1/containers/state:save") + try: + async with httpx.AsyncClient(timeout=self._save_restore_timeout) as client: + response = await client.post(url) + if response.status_code != status.HTTP_204_NO_CONTENT: + message = ( + f"ERROR while saving service state: " + f"status={response.status_code}, body={response.text}" + ) + logging.warning(message) + raise DynamicSchedulerException(message) + except httpx.HTTPError as e: + log_httpx_http_error(url, "PUT", traceback.format_exc()) + raise e + + async def service_restore_state(self, dynamic_sidecar_endpoint: str) -> None: + url = get_url(dynamic_sidecar_endpoint, "/v1/containers/state:restore") + try: + async with httpx.AsyncClient(timeout=self._save_restore_timeout) as client: + response = await client.post(url) + if response.status_code != status.HTTP_204_NO_CONTENT: + message = ( + f"ERROR while restoring service state: " + f"status={response.status_code}, body={response.text}" + ) + logging.warning(message) + raise DynamicSchedulerException(message) + except httpx.HTTPError as e: + log_httpx_http_error(url, "PUT", traceback.format_exc()) + raise e + + async def service_pull_input_ports( + self, dynamic_sidecar_endpoint: str, port_keys: Optional[List[str]] = None + ) -> int: + port_keys = [] if port_keys is None else port_keys + url = get_url(dynamic_sidecar_endpoint, "/v1/containers/ports/inputs:pull") + try: + async with httpx.AsyncClient(timeout=self._save_restore_timeout) as client: + response = await client.post(url, json=port_keys) + if response.status_code != status.HTTP_200_OK: + message = ( + f"ERROR while restoring service state: " + f"status={response.status_code}, body={response.text}" + ) + logging.warning(message) + raise DynamicSchedulerException(message) + return int(response.text) + except httpx.HTTPError as e: + log_httpx_http_error(url, "PUT", traceback.format_exc()) + raise e + + async def service_push_output_ports( + self, dynamic_sidecar_endpoint: str, port_keys: Optional[List[str]] = None + ) -> None: + port_keys = [] if port_keys is None else port_keys + url = get_url(dynamic_sidecar_endpoint, "/v1/containers/ports/outputs:push") + try: + async with httpx.AsyncClient(timeout=self._save_restore_timeout) as client: + response = await client.post(url, json=port_keys) + if response.status_code != status.HTTP_204_NO_CONTENT: + message = ( + f"ERROR while restoring service state: " + f"status={response.status_code}, body={response.text}" + ) + logging.warning(message) + raise DynamicSchedulerException(message) + except httpx.HTTPError as e: + log_httpx_http_error(url, "PUT", traceback.format_exc()) + raise e + async def get_entrypoint_container_name( self, dynamic_sidecar_endpoint: str, dynamic_sidecar_network_name: str ) -> str: diff --git a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_api.py b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_api.py index b56b7299deb..a5cef6a0553 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_api.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_api.py @@ -1,4 +1,6 @@ # wraps all calls to underlying docker engine + + import asyncio import logging import time @@ -29,6 +31,52 @@ log = logging.getLogger(__name__) +def _monkey_patch_aiodocker() -> None: + """Raises an error once the library is up to date.""" + from distutils.version import LooseVersion + + from aiodocker import volumes + from aiodocker.utils import clean_filters + from aiodocker.volumes import DockerVolume + + if LooseVersion(aiodocker.__version__) > LooseVersion("0.21.0"): + raise RuntimeError( + "Please check that PR https://github.com/aio-libs/aiodocker/pull/623 " + "is not part of the current bump version. " + "Otherwise, if the current PR is part of this new release " + "remove monkey_patch." + ) + + # pylint: disable=protected-access + async def _custom_volumes_list(self, *, filters=None): + """ + Return a list of volumes + + Args: + filters: a dict with a list of filters + + Available filters: + dangling= + driver= + label= or label=: + name= + """ + params = {} if filters is None else {"filters": clean_filters(filters)} + + data = await self.docker._query_json("volumes", params=params) + return data + + async def _custom_volumes_get(self, id): # pylint: disable=redefined-builtin + data = await self.docker._query_json("volumes/{id}".format(id=id), method="GET") + return DockerVolume(self.docker, data["Name"]) + + setattr(volumes.DockerVolumes, "list", _custom_volumes_list) + setattr(volumes.DockerVolumes, "get", _custom_volumes_get) + + +_monkey_patch_aiodocker() + + @asynccontextmanager async def docker_client() -> AsyncIterator[aiodocker.docker.Docker]: client = None @@ -322,6 +370,20 @@ async def remove_dynamic_sidecar_network(network_name: str) -> bool: return False +async def remove_dynamic_sidecar_volumes(node_uuid: NodeID) -> bool: + async with docker_client() as client: + volumes_response = await client.volumes.list( + filters={"label": f"uuid={node_uuid}"} + ) + volumes = volumes_response["Volumes"] + for volume_data in volumes: + volume = await client.volumes.get(volume_data["Name"]) + await volume.delete() + + log.debug("Remove volumes: %s", [v["Name"] for v in volumes]) + return True + + async def list_dynamic_sidecar_services( dynamic_sidecar_settings: DynamicSidecarSettings, user_id: Optional[UserID] = None, diff --git a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_compose_specs.py b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_compose_specs.py index 0a6afe551ae..cb9598c7f09 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_compose_specs.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_compose_specs.py @@ -1,9 +1,10 @@ +import json from copy import deepcopy -from typing import Any, Dict, Optional +from typing import Any, Dict, List, Optional import yaml from fastapi.applications import FastAPI -from models_library.service_settings_labels import ComposeSpecLabel +from models_library.service_settings_labels import ComposeSpecLabel, PathMappingsLabel from ...core.settings import DynamicSidecarSettings from .docker_service_specs import MATCH_SERVICE_VERSION, MATCH_SIMCORE_REGISTRY @@ -33,15 +34,28 @@ def _inject_proxy_network_configuration( } } - # Inject Traefik rules on target container - target_container_spec = service_spec["services"][target_container] - # attach overlay network to container + target_container_spec = service_spec["services"][target_container] container_networks = target_container_spec.get("networks", []) container_networks.append(dynamic_sidecar_network_name) target_container_spec["networks"] = container_networks +def _inject_paths_mappings( + service_spec: Dict[str, Any], path_mappings: PathMappingsLabel +) -> None: + for service_name in service_spec["services"]: + service_content = service_spec["services"][service_name] + + environment_vars: List[str] = service_content.get("environment", []) + environment_vars.append(f"DY_SIDECAR_PATH_INPUTS={path_mappings.inputs_path}") + environment_vars.append(f"DY_SIDECAR_PATH_OUTPUTS={path_mappings.outputs_path}") + str_path_mappings = json.dumps([str(x) for x in path_mappings.state_paths]) + environment_vars.append(f"DY_SIDECAR_STATE_PATHS={str_path_mappings}") + + service_content["environment"] = environment_vars + + def _assemble_from_service_key_and_tag( resolved_registry_url: str, service_key: str, @@ -71,6 +85,7 @@ async def assemble_spec( app: FastAPI, service_key: str, service_tag: str, + paths_mapping: PathMappingsLabel, compose_spec: ComposeSpecLabel, container_http_entry: Optional[str], dynamic_sidecar_network_name: str, @@ -103,6 +118,8 @@ async def assemble_spec( dynamic_sidecar_network_name=dynamic_sidecar_network_name, ) + _inject_paths_mappings(service_spec, paths_mapping) + stringified_service_spec = yaml.safe_dump(service_spec) stringified_service_spec = _replace_env_vars_in_compose_spec( stringified_service_spec=stringified_service_spec, diff --git a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/__init__.py b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/__init__.py index 9b29fb169db..e431fe6607d 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/__init__.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/__init__.py @@ -1,8 +1,11 @@ -from .spec_dynamic_sidecar import ( +from .settings import ( MATCH_SERVICE_VERSION, MATCH_SIMCORE_REGISTRY, + inject_settings_to_create_service_params, + merge_settings_before_use, +) +from .spec_dynamic_sidecar import ( extract_service_port_from_compose_start_spec, get_dynamic_sidecar_spec, - merge_settings_before_use, ) from .spec_proxy import get_dynamic_proxy_spec diff --git a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/settings.py b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/settings.py new file mode 100644 index 00000000000..c8c436f32e6 --- /dev/null +++ b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/settings.py @@ -0,0 +1,386 @@ +import json +import logging +from collections import deque +from typing import Any, Deque, Dict, List, cast + +from models_library.service_settings_labels import ( + ComposeSpecLabel, + SimcoreServiceLabels, + SimcoreServiceSettingLabelEntry, + SimcoreServiceSettingsLabel, +) +from models_library.services import ServiceKeyVersion + +from ....api.dependencies.director_v0 import DirectorV0Client +from ..errors import DynamicSidecarError + +# Notes on below env var names: +# - SIMCORE_REGISTRY will be replaced by the url of the simcore docker registry +# deployed inside the platform +# - SERVICE_VERSION will be replaced by the version of the service +# to which this compos spec is attached +# Example usage in docker compose: +# image: ${SIMCORE_REGISTRY}/${DOCKER_IMAGE_NAME}-dynamic-sidecar-compose-spec:${SERVICE_VERSION} +MATCH_SERVICE_VERSION = "${SERVICE_VERSION}" +MATCH_SIMCORE_REGISTRY = "${SIMCORE_REGISTRY}" +MATCH_IMAGE_START = f"{MATCH_SIMCORE_REGISTRY}/" +MATCH_IMAGE_END = f":{MATCH_SERVICE_VERSION}" + +log = logging.getLogger(__name__) + + +def _parse_mount_settings(settings: List[Dict]) -> List[Dict]: + mounts = [] + for s in settings: + log.debug("Retrieved mount settings %s", s) + mount = {} + mount["ReadOnly"] = True + if "ReadOnly" in s and s["ReadOnly"] in ["false", "False", False]: + mount["ReadOnly"] = False + + for field in ["Source", "Target", "Type"]: + if field in s: + mount[field] = s[field] + else: + log.warning( + "Mount settings have wrong format. Required keys [Source, Target, Type]" + ) + continue + + log.debug("Append mount settings %s", mount) + mounts.append(mount) + + return mounts + + +def _parse_env_settings(settings: List[str]) -> Dict: + envs = {} + for s in settings: + log.debug("Retrieved env settings %s", s) + if "=" in s: + parts = s.split("=") + if len(parts) == 2: + # will be forwarded to dynamic-sidecar spawned containers + envs[f"FORWARD_ENV_{parts[0]}"] = parts[1] + + log.debug("Parsed env settings %s", s) + + return envs + + +# pylint: disable=too-many-branches +def inject_settings_to_create_service_params( + labels_service_settings: SimcoreServiceSettingsLabel, + create_service_params: Dict[str, Any], +) -> None: + for param in labels_service_settings: + param: SimcoreServiceSettingLabelEntry = param + # NOTE: the below capitalize addresses a bug in a lot of already in use services + # where Resources was written in lower case + if param.setting_type.capitalize() == "Resources": + # python-API compatible for backward compatibility + if "mem_limit" in param.value: + create_service_params["task_template"]["Resources"]["Limits"][ + "MemoryBytes" + ] = param.value["mem_limit"] + if "cpu_limit" in param.value: + create_service_params["task_template"]["Resources"]["Limits"][ + "NanoCPUs" + ] = param.value["cpu_limit"] + if "mem_reservation" in param.value: + create_service_params["task_template"]["Resources"]["Reservations"][ + "MemoryBytes" + ] = param.value["mem_reservation"] + if "cpu_reservation" in param.value: + create_service_params["task_template"]["Resources"]["Reservations"][ + "NanoCPUs" + ] = param.value["cpu_reservation"] + # REST-API compatible + if "Limits" in param.value or "Reservations" in param.value: + create_service_params["task_template"]["Resources"].update(param.value) + + # publishing port on the ingress network. + elif param.name == "ports" and param.setting_type == "int": # backward comp + create_service_params["labels"]["port"] = create_service_params["labels"][ + "service_port" + ] = str(param.value) + # REST-API compatible + elif param.setting_type == "EndpointSpec": + if "Ports" in param.value: + if ( + isinstance(param.value["Ports"], list) + and "TargetPort" in param.value["Ports"][0] + ): + create_service_params["labels"]["port"] = create_service_params[ + "labels" + ]["service_port"] = str(param.value["Ports"][0]["TargetPort"]) + + # placement constraints + elif param.name == "constraints": # python-API compatible + create_service_params["task_template"]["Placement"][ + "Constraints" + ] += param.value + elif param.setting_type == "Constraints": # REST-API compatible + create_service_params["task_template"]["Placement"][ + "Constraints" + ] += param.value + elif param.name == "env": + log.debug("Found env parameter %s", param.value) + env_settings = _parse_env_settings(param.value) + if env_settings: + create_service_params["task_template"]["ContainerSpec"]["Env"].update( + env_settings + ) + elif param.name == "mount": + log.debug("Found mount parameter %s", param.value) + mount_settings: List[Dict] = _parse_mount_settings(param.value) + if mount_settings: + create_service_params["task_template"]["ContainerSpec"][ + "Mounts" + ].extend(mount_settings) + + container_spec = create_service_params["task_template"]["ContainerSpec"] + # set labels for CPU and Memory limits + container_spec["Labels"]["nano_cpus_limit"] = str( + create_service_params["task_template"]["Resources"]["Limits"]["NanoCPUs"] + ) + container_spec["Labels"]["mem_limit"] = str( + create_service_params["task_template"]["Resources"]["Limits"]["MemoryBytes"] + ) + + +def _assemble_key(service_key: str, service_tag: str) -> str: + return f"{service_key}:{service_tag}" + + +async def _extract_osparc_involved_service_labels( + director_v0_client: DirectorV0Client, + service_key: str, + service_tag: str, + service_labels: SimcoreServiceLabels, +) -> Dict[str, SimcoreServiceLabels]: + """ + Returns all the involved oSPARC services from the provided service labels. + + If the service contains a compose-spec that will also be parsed for images. + Searches for images like the following in the spec: + - `${REGISTRY_URL}/**SOME_SERVICE_NAME**:${SERVICE_TAG}` + - `${REGISTRY_URL}/**SOME_SERVICE_NAME**:1.2.3` where `1.2.3` is a hardcoded tag + """ + + # initialize with existing labels + # stores labels mapped by image_name service:tag + docker_image_name_by_services: Dict[str, SimcoreServiceLabels] = { + _assemble_key(service_key=service_key, service_tag=service_tag): service_labels + } + compose_spec: ComposeSpecLabel = cast(ComposeSpecLabel, service_labels.compose_spec) + if compose_spec is None: + return docker_image_name_by_services + + # maps form image_name to compose_spec key + reverse_mapping: Dict[str, str] = {} + + compose_spec_services = compose_spec.get("services", {}) + image = None + for compose_service_key, service_data in compose_spec_services.items(): + image = service_data.get("image", None) + if image is None: + continue + + # if image dose not have this format skip: + # - `${SIMCORE_REGISTRY}/**SOME_SERVICE_NAME**:${SERVICE_VERSION}` + # - `${SIMCORE_REGISTRY}/**SOME_SERVICE_NAME**:1.2.3` a hardcoded tag + if not image.startswith(MATCH_IMAGE_START) or ":" not in image: + continue + if not image.startswith(MATCH_IMAGE_START) or not image.endswith( + MATCH_IMAGE_END + ): + continue + + # strips `${REGISTRY_URL}/`; replaces `${SERVICE_TAG}` with `service_tag` + osparc_image_key = image.replace(MATCH_SERVICE_VERSION, service_tag).replace( + MATCH_IMAGE_START, "" + ) + current_service_key, current_service_tag = osparc_image_key.split(":") + involved_key = _assemble_key( + service_key=current_service_key, service_tag=current_service_tag + ) + reverse_mapping[involved_key] = compose_service_key + + simcore_service_labels: SimcoreServiceLabels = ( + await director_v0_client.get_service_labels( + service=ServiceKeyVersion( + key=current_service_key, version=current_service_tag + ) + ) + ) + docker_image_name_by_services[involved_key] = simcore_service_labels + + if len(reverse_mapping) != len(docker_image_name_by_services): + message = ( + f"Extracting labels for services in '{image}' could not fill " + f"reverse_mapping={reverse_mapping}; " + f"docker_image_name_by_services={docker_image_name_by_services}" + ) + log.error(message) + raise DynamicSidecarError(message) + + # remaps from image_name as key to compose_spec key + compose_spec_mapped_labels = { + reverse_mapping[k]: v for k, v in docker_image_name_by_services.items() + } + return compose_spec_mapped_labels + + +def _add_compose_destination_container_to_settings_entries( + settings: SimcoreServiceSettingsLabel, destination_container: str +) -> List[SimcoreServiceSettingLabelEntry]: + def _inject_destination_container( + item: SimcoreServiceSettingLabelEntry, + ) -> SimcoreServiceSettingLabelEntry: + # pylint: disable=protected-access + item._destination_container = destination_container + return item + + return [_inject_destination_container(x) for x in settings] + + +def _merge_resources_in_settings( + settings: Deque[SimcoreServiceSettingLabelEntry], +) -> Deque[SimcoreServiceSettingLabelEntry]: + """All oSPARC services which have defined resource requirements will be added""" + result: Deque[SimcoreServiceSettingLabelEntry] = deque() + resources_entries: Deque[SimcoreServiceSettingLabelEntry] = deque() + + log.debug("merging settings %s", settings) + + for entry in settings: + entry: SimcoreServiceSettingLabelEntry = entry + if entry.name == "Resources" and entry.setting_type == "Resources": + resources_entries.append(entry) + else: + result.append(entry) + + if len(resources_entries) <= 1: + return settings + + # merge all resources + empty_resource_entry: SimcoreServiceSettingLabelEntry = ( + SimcoreServiceSettingLabelEntry( + name="Resources", + setting_type="Resources", + value={ + "Limits": {"NanoCPUs": 0, "MemoryBytes": 0}, + "Reservations": { + "NanoCPUs": 0, + "MemoryBytes": 0, + "GenericResources": [], + }, + }, + ) + ) + + for resource_entry in resources_entries: + resource_entry: SimcoreServiceSettingLabelEntry = resource_entry + limits = resource_entry.value.get("Limits", {}) + empty_resource_entry.value["Limits"]["NanoCPUs"] += limits.get("NanoCPUs", 0) + empty_resource_entry.value["Limits"]["MemoryBytes"] += limits.get( + "MemoryBytes", 0 + ) + + reservations = resource_entry.value.get("Reservations", {}) + empty_resource_entry.value["Reservations"]["NanoCPUs"] = reservations.get( + "NanoCPUs", 0 + ) + empty_resource_entry.value["Reservations"]["MemoryBytes"] = reservations.get( + "MemoryBytes", 0 + ) + empty_resource_entry.value["Reservations"]["GenericResources"] = [] + # put all generic resources together without looking for duplicates + empty_resource_entry.value["Reservations"]["GenericResources"].extend( + reservations.get("GenericResources", []) + ) + + result.append(empty_resource_entry) + + return result + + +def _patch_target_service_into_env_vars( + settings: Deque[SimcoreServiceSettingLabelEntry], +) -> Deque[SimcoreServiceSettingLabelEntry]: + """NOTE: this method will modify settings in place""" + + def _format_env_var(env_var: str, destination_container: str) -> str: + var_name, var_payload = env_var.split("=") + json_encoded = json.dumps( + dict(destination_container=destination_container, env_var=var_payload) + ) + return f"{var_name}={json_encoded}" + + for entry in settings: + entry: SimcoreServiceSettingLabelEntry = entry + if entry.name == "env" and entry.setting_type == "string": + # process entry + list_of_env_vars = entry.value if entry.value else [] + + # pylint: disable=protected-access + destination_container = entry._destination_container + + # transforms settings defined environment variables + # from `ENV_VAR=PAYLOAD` + # to `ENV_VAR={"destination_container": "destination_container", "env_var": "PAYLOAD"}` + entry.value = [ + _format_env_var(x, destination_container) for x in list_of_env_vars + ] + + return settings + + +async def merge_settings_before_use( + director_v0_client: DirectorV0Client, service_key: str, service_tag: str +) -> SimcoreServiceSettingsLabel: + + simcore_service_labels: SimcoreServiceLabels = ( + await director_v0_client.get_service_labels( + service=ServiceKeyVersion(key=service_key, version=service_tag) + ) + ) + log.info( + "image=%s, tag=%s, labels=%s", service_key, service_tag, simcore_service_labels + ) + + # paths_mapping express how to map dynamic-sidecar paths to the compose-spec volumes + # where the service expects to find its certain folders + + labels_for_involved_services: Dict[ + str, SimcoreServiceLabels + ] = await _extract_osparc_involved_service_labels( + director_v0_client=director_v0_client, + service_key=service_key, + service_tag=service_tag, + service_labels=simcore_service_labels, + ) + logging.info("labels_for_involved_services=%s", labels_for_involved_services) + + # merge the settings from the all the involved services + settings: Deque[SimcoreServiceSettingLabelEntry] = deque() # TODO: fix typing here + for compose_spec_key, service_labels in labels_for_involved_services.items(): + service_settings: SimcoreServiceSettingsLabel = cast( + SimcoreServiceSettingsLabel, service_labels.settings + ) + + settings.extend( + # inject compose spec key, used to target container specific services + _add_compose_destination_container_to_settings_entries( + settings=service_settings, destination_container=compose_spec_key + ) + ) + + settings = _merge_resources_in_settings(settings) + settings = _patch_target_service_into_env_vars(settings) + + return SimcoreServiceSettingsLabel.parse_obj(settings) + + +__all__ = ["merge_settings_before_use", "inject_settings_to_create_service_params"] diff --git a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/spec_dynamic_sidecar.py b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/spec_dynamic_sidecar.py index 6724c82d85d..a6f794659c0 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/spec_dynamic_sidecar.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/spec_dynamic_sidecar.py @@ -1,35 +1,16 @@ import json import logging -from collections import deque -from typing import Any, Deque, Dict, List, cast +from pathlib import Path +from typing import Any, Dict -from models_library.service_settings_labels import ( - ComposeSpecLabel, - SimcoreServiceLabels, - SimcoreServiceSettingLabelEntry, - SimcoreServiceSettingsLabel, -) -from models_library.services import ServiceKeyVersion +from models_library.service_settings_labels import SimcoreServiceSettingsLabel -from ....api.dependencies.director_v0 import DirectorV0Client -from ....core.settings import DynamicSidecarSettings +from ....core.settings import AppSettings, DynamicSidecarSettings from ....models.schemas.constants import DYNAMIC_SIDECAR_SERVICE_PREFIX from ....models.schemas.dynamic_services import SchedulerData, ServiceType from ....utils.registry import get_dynamic_sidecar_env_vars -from ..errors import DynamicSidecarError - -# Notes on below env var names: -# - SIMCORE_REGISTRY will be replaced by the url of the simcore docker registry -# deployed inside the platform -# - SERVICE_VERSION will be replaced by the version of the service -# to which this compos spec is attached -# Example usage in docker compose: -# image: ${SIMCORE_REGISTRY}/${DOCKER_IMAGE_NAME}-dynamic-sidecar-compose-spec:${SERVICE_VERSION} -MATCH_SERVICE_VERSION = "${SERVICE_VERSION}" -MATCH_SIMCORE_REGISTRY = "${SIMCORE_REGISTRY}" -MATCH_IMAGE_START = f"{MATCH_SIMCORE_REGISTRY}/" -MATCH_IMAGE_END = f":{MATCH_SERVICE_VERSION}" - +from ..volumes_resolver import DynamicSidecarVolumesPathsResolver +from .settings import inject_settings_to_create_service_params log = logging.getLogger(__name__) @@ -40,358 +21,26 @@ def extract_service_port_from_compose_start_spec( return create_service_params["labels"]["service_port"] -def _parse_mount_settings(settings: List[Dict]) -> List[Dict]: - mounts = [] - for s in settings: - log.debug("Retrieved mount settings %s", s) - mount = {} - mount["ReadOnly"] = True - if "ReadOnly" in s and s["ReadOnly"] in ["false", "False", False]: - mount["ReadOnly"] = False - - for field in ["Source", "Target", "Type"]: - if field in s: - mount[field] = s[field] - else: - log.warning( - "Mount settings have wrong format. Required keys [Source, Target, Type]" - ) - continue - - log.debug("Append mount settings %s", mount) - mounts.append(mount) - - return mounts - - -def _parse_env_settings(settings: List[str]) -> Dict: - envs = {} - for s in settings: - log.debug("Retrieved env settings %s", s) - if "=" in s: - parts = s.split("=") - if len(parts) == 2: - # will be forwarded to dynamic-sidecar spawned containers - envs[f"FORWARD_ENV_{parts[0]}"] = parts[1] - - log.debug("Parsed env settings %s", s) - - return envs - - -# pylint: disable=too-many-branches -def _inject_settings_to_create_service_params( - labels_service_settings: SimcoreServiceSettingsLabel, - create_service_params: Dict[str, Any], -) -> None: - for param in labels_service_settings: - param: SimcoreServiceSettingLabelEntry = param - # NOTE: the below capitalize addresses a bug in a lot of already in use services - # where Resources was written in lower case - if param.setting_type.capitalize() == "Resources": - # python-API compatible for backward compatibility - if "mem_limit" in param.value: - create_service_params["task_template"]["Resources"]["Limits"][ - "MemoryBytes" - ] = param.value["mem_limit"] - if "cpu_limit" in param.value: - create_service_params["task_template"]["Resources"]["Limits"][ - "NanoCPUs" - ] = param.value["cpu_limit"] - if "mem_reservation" in param.value: - create_service_params["task_template"]["Resources"]["Reservations"][ - "MemoryBytes" - ] = param.value["mem_reservation"] - if "cpu_reservation" in param.value: - create_service_params["task_template"]["Resources"]["Reservations"][ - "NanoCPUs" - ] = param.value["cpu_reservation"] - # REST-API compatible - if "Limits" in param.value or "Reservations" in param.value: - create_service_params["task_template"]["Resources"].update(param.value) - - # publishing port on the ingress network. - elif param.name == "ports" and param.setting_type == "int": # backward comp - create_service_params["labels"]["port"] = create_service_params["labels"][ - "service_port" - ] = str(param.value) - # REST-API compatible - elif param.setting_type == "EndpointSpec": - if "Ports" in param.value: - if ( - isinstance(param.value["Ports"], list) - and "TargetPort" in param.value["Ports"][0] - ): - create_service_params["labels"]["port"] = create_service_params[ - "labels" - ]["service_port"] = str(param.value["Ports"][0]["TargetPort"]) - - # placement constraints - elif param.name == "constraints": # python-API compatible - create_service_params["task_template"]["Placement"][ - "Constraints" - ] += param.value - elif param.setting_type == "Constraints": # REST-API compatible - create_service_params["task_template"]["Placement"][ - "Constraints" - ] += param.value - elif param.name == "env": - log.debug("Found env parameter %s", param.value) - env_settings = _parse_env_settings(param.value) - if env_settings: - create_service_params["task_template"]["ContainerSpec"]["Env"].update( - env_settings - ) - elif param.name == "mount": - log.debug("Found mount parameter %s", param.value) - mount_settings: List[Dict] = _parse_mount_settings(param.value) - if mount_settings: - create_service_params["task_template"]["ContainerSpec"][ - "Mounts" - ].extend(mount_settings) - - container_spec = create_service_params["task_template"]["ContainerSpec"] - # set labels for CPU and Memory limits - container_spec["Labels"]["nano_cpus_limit"] = str( - create_service_params["task_template"]["Resources"]["Limits"]["NanoCPUs"] - ) - container_spec["Labels"]["mem_limit"] = str( - create_service_params["task_template"]["Resources"]["Limits"]["MemoryBytes"] - ) - - -def _assemble_key(service_key: str, service_tag: str) -> str: - return f"{service_key}:{service_tag}" - - -async def _extract_osparc_involved_service_labels( - director_v0_client: DirectorV0Client, - service_key: str, - service_tag: str, - service_labels: SimcoreServiceLabels, -) -> Dict[str, SimcoreServiceLabels]: - """ - Returns all the involved oSPARC services from the provided service labels. - - If the service contains a compose-spec that will also be parsed for images. - Searches for images like the following in the spec: - - `${REGISTRY_URL}/**SOME_SERVICE_NAME**:${SERVICE_TAG}` - - `${REGISTRY_URL}/**SOME_SERVICE_NAME**:1.2.3` where `1.2.3` is a hardcoded tag - """ - - # initialize with existing labels - # stores labels mapped by image_name service:tag - docker_image_name_by_services: Dict[str, SimcoreServiceLabels] = { - _assemble_key(service_key=service_key, service_tag=service_tag): service_labels +def _get_dy_sidecar_env_vars( + scheduler_data: SchedulerData, app_settings: AppSettings +) -> Dict[str, str]: + return { + "DY_SIDECAR_PATH_INPUTS": f"{scheduler_data.paths_mapping.inputs_path}", + "DY_SIDECAR_PATH_OUTPUTS": f"{scheduler_data.paths_mapping.outputs_path}", + "DY_SIDECAR_STATE_PATHS": json.dumps( + [f"{x}" for x in scheduler_data.paths_mapping.state_paths] + ), + "DY_SIDECAR_USER_ID": f"{scheduler_data.user_id}", + "DY_SIDECAR_PROJECT_ID": f"{scheduler_data.project_id}", + "DY_SIDECAR_NODE_ID": f"{scheduler_data.node_uuid}", + "POSTGRES_HOST": f"{app_settings.POSTGRES.POSTGRES_HOST}", + "POSTGRES_ENDPOINT": f"{app_settings.POSTGRES.POSTGRES_HOST}:{app_settings.POSTGRES.POSTGRES_PORT}", + "POSTGRES_PASSWORD": f"{app_settings.POSTGRES.POSTGRES_PASSWORD.get_secret_value()}", + "POSTGRES_PORT": f"{app_settings.POSTGRES.POSTGRES_PORT}", + "POSTGRES_USER": f"{app_settings.POSTGRES.POSTGRES_USER}", + "POSTGRES_DB": f"{app_settings.POSTGRES.POSTGRES_DB}", + "STORAGE_ENDPOINT": app_settings.STORAGE_ENDPOINT, } - compose_spec: ComposeSpecLabel = cast(ComposeSpecLabel, service_labels.compose_spec) - if compose_spec is None: - return docker_image_name_by_services - - # maps form image_name to compose_spec key - reverse_mapping: Dict[str, str] = {} - - compose_spec_services = compose_spec.get("services", {}) - image = None - for compose_service_key, service_data in compose_spec_services.items(): - image = service_data.get("image", None) - if image is None: - continue - - # if image dose not have this format skip: - # - `${SIMCORE_REGISTRY}/**SOME_SERVICE_NAME**:${SERVICE_VERSION}` - # - `${SIMCORE_REGISTRY}/**SOME_SERVICE_NAME**:1.2.3` a hardcoded tag - if not image.startswith(MATCH_IMAGE_START) or ":" not in image: - continue - if not image.startswith(MATCH_IMAGE_START) or not image.endswith( - MATCH_IMAGE_END - ): - continue - - # strips `${REGISTRY_URL}/`; replaces `${SERVICE_TAG}` with `service_tag` - osparc_image_key = image.replace(MATCH_SERVICE_VERSION, service_tag).replace( - MATCH_IMAGE_START, "" - ) - current_service_key, current_service_tag = osparc_image_key.split(":") - involved_key = _assemble_key( - service_key=current_service_key, service_tag=current_service_tag - ) - reverse_mapping[involved_key] = compose_service_key - - simcore_service_labels: SimcoreServiceLabels = ( - await director_v0_client.get_service_labels( - service=ServiceKeyVersion( - key=current_service_key, version=current_service_tag - ) - ) - ) - docker_image_name_by_services[involved_key] = simcore_service_labels - - if len(reverse_mapping) != len(docker_image_name_by_services): - message = ( - f"Extracting labels for services in '{image}' could not fill " - f"reverse_mapping={reverse_mapping}; " - f"docker_image_name_by_services={docker_image_name_by_services}" - ) - log.error(message) - raise DynamicSidecarError(message) - - # remaps from image_name as key to compose_spec key - compose_spec_mapped_labels = { - reverse_mapping[k]: v for k, v in docker_image_name_by_services.items() - } - return compose_spec_mapped_labels - - -def _add_compose_destination_container_to_settings_entries( - settings: SimcoreServiceSettingsLabel, destination_container: str -) -> List[SimcoreServiceSettingLabelEntry]: - def _inject_destination_container( - item: SimcoreServiceSettingLabelEntry, - ) -> SimcoreServiceSettingLabelEntry: - # pylint: disable=protected-access - item._destination_container = destination_container - return item - - return [_inject_destination_container(x) for x in settings] - - -def _merge_resources_in_settings( - settings: Deque[SimcoreServiceSettingLabelEntry], -) -> Deque[SimcoreServiceSettingLabelEntry]: - """All oSPARC services which have defined resource requirements will be added""" - result: Deque[SimcoreServiceSettingLabelEntry] = deque() - resources_entries: Deque[SimcoreServiceSettingLabelEntry] = deque() - - log.debug("merging settings %s", settings) - - for entry in settings: - entry: SimcoreServiceSettingLabelEntry = entry - if entry.name == "Resources" and entry.setting_type == "Resources": - resources_entries.append(entry) - else: - result.append(entry) - - if len(resources_entries) <= 1: - return settings - - # merge all resources - empty_resource_entry: SimcoreServiceSettingLabelEntry = ( - SimcoreServiceSettingLabelEntry( - name="Resources", - setting_type="Resources", - value={ - "Limits": {"NanoCPUs": 0, "MemoryBytes": 0}, - "Reservations": { - "NanoCPUs": 0, - "MemoryBytes": 0, - "GenericResources": [], - }, - }, - ) - ) - - for resource_entry in resources_entries: - resource_entry: SimcoreServiceSettingLabelEntry = resource_entry - limits = resource_entry.value.get("Limits", {}) - empty_resource_entry.value["Limits"]["NanoCPUs"] += limits.get("NanoCPUs", 0) - empty_resource_entry.value["Limits"]["MemoryBytes"] += limits.get( - "MemoryBytes", 0 - ) - - reservations = resource_entry.value.get("Reservations", {}) - empty_resource_entry.value["Reservations"]["NanoCPUs"] = reservations.get( - "NanoCPUs", 0 - ) - empty_resource_entry.value["Reservations"]["MemoryBytes"] = reservations.get( - "MemoryBytes", 0 - ) - empty_resource_entry.value["Reservations"]["GenericResources"] = [] - # put all generic resources together without looking for duplicates - empty_resource_entry.value["Reservations"]["GenericResources"].extend( - reservations.get("GenericResources", []) - ) - - result.append(empty_resource_entry) - - return result - - -def _patch_target_service_into_env_vars( - settings: Deque[SimcoreServiceSettingLabelEntry], -) -> Deque[SimcoreServiceSettingLabelEntry]: - """NOTE: this method will modify settings in place""" - - def _format_env_var(env_var: str, destination_container: str) -> str: - var_name, var_payload = env_var.split("=") - json_encoded = json.dumps( - dict(destination_container=destination_container, env_var=var_payload) - ) - return f"{var_name}={json_encoded}" - - for entry in settings: - entry: SimcoreServiceSettingLabelEntry = entry - if entry.name == "env" and entry.setting_type == "string": - # process entry - list_of_env_vars = entry.value if entry.value else [] - - # pylint: disable=protected-access - destination_container = entry._destination_container - - # transforms settings defined environment variables - # from `ENV_VAR=PAYLOAD` - # to `ENV_VAR={"destination_container": "destination_container", "env_var": "PAYLOAD"}` - entry.value = [ - _format_env_var(x, destination_container) for x in list_of_env_vars - ] - - return settings - - -async def merge_settings_before_use( - director_v0_client: DirectorV0Client, service_key: str, service_tag: str -) -> SimcoreServiceSettingsLabel: - - simcore_service_labels: SimcoreServiceLabels = ( - await director_v0_client.get_service_labels( - service=ServiceKeyVersion(key=service_key, version=service_tag) - ) - ) - log.info( - "image=%s, tag=%s, labels=%s", service_key, service_tag, simcore_service_labels - ) - - # paths_mapping express how to map dynamic-sidecar paths to the compose-spec volumes - # where the service expects to find its certain folders - - labels_for_involved_services: Dict[ - str, SimcoreServiceLabels - ] = await _extract_osparc_involved_service_labels( - director_v0_client=director_v0_client, - service_key=service_key, - service_tag=service_tag, - service_labels=simcore_service_labels, - ) - logging.info("labels_for_involved_services=%s", labels_for_involved_services) - - # merge the settings from the all the involved services - settings: Deque[SimcoreServiceSettingLabelEntry] = deque() # TODO: fix typing here - for compose_spec_key, service_labels in labels_for_involved_services.items(): - service_settings: SimcoreServiceSettingsLabel = cast( - SimcoreServiceSettingsLabel, service_labels.settings - ) - - settings.extend( - # inject compose spec key, used to target container specific services - _add_compose_destination_container_to_settings_entries( - settings=service_settings, destination_container=compose_spec_key - ) - ) - - settings = _merge_resources_in_settings(settings) - settings = _patch_target_service_into_env_vars(settings) - - return SimcoreServiceSettingsLabel.parse_obj(settings) async def get_dynamic_sidecar_spec( @@ -400,12 +49,18 @@ async def get_dynamic_sidecar_spec( dynamic_sidecar_network_id: str, swarm_network_id: str, settings: SimcoreServiceSettingsLabel, + app_settings: AppSettings, ) -> Dict[str, Any]: """ The dynamic-sidecar is responsible for managing the lifecycle of the dynamic service. The director-v2 directly coordinates with the dynamic-sidecar for this purpose. """ + # To avoid collisions for started docker resources a unique identifier is computed: + # - avoids container level collisions on same node + # - avoids volume level collisions on same node + compose_namespace = f"{DYNAMIC_SIDECAR_SERVICE_PREFIX}_{scheduler_data.node_uuid}" + mounts = [ # docker socket needed to use the docker api { @@ -415,6 +70,28 @@ async def get_dynamic_sidecar_spec( } ] + # Docker does not allow mounting of subfolders from volumes as the following: + # `volume_name/inputs:/target_folder/inputs` + # `volume_name/outputs:/target_folder/inputs` + # `volume_name/path/to/sate/01:/target_folder/path_to_sate_01` + # + # Two separate volumes are required to achieve the following on the spawned + # dynamic-sidecar containers: + # `volume_name_inputs:/target_folder/inputs` + # `volume_name_outputs:/target_folder/outputs` + # `volume_name_path_to_sate_01:/target_folder/path_to_sate_01` + for path_to_mount in [ + Path("/inputs"), + Path("/outputs"), + ] + scheduler_data.paths_mapping.state_paths: + mounts.append( + DynamicSidecarVolumesPathsResolver.mount_entry( + compose_namespace=compose_namespace, + state_path=path_to_mount, + node_uuid=f"{scheduler_data.node_uuid}", + ) + ) + endpint_spec = {} if dynamic_sidecar_settings.DYNAMIC_SIDECAR_MOUNT_PATH_DEV is not None: @@ -456,9 +133,6 @@ async def get_dynamic_sidecar_spec( } ] - # used for the container name to avoid collisions for started containers on the same node - compose_namespace = f"{DYNAMIC_SIDECAR_SERVICE_PREFIX}_{scheduler_data.node_uuid}" - create_service_params = { "endpoint_spec": endpint_spec, "labels": { @@ -491,6 +165,7 @@ async def get_dynamic_sidecar_spec( "SIMCORE_HOST_NAME": scheduler_data.service_name, "DYNAMIC_SIDECAR_COMPOSE_NAMESPACE": compose_namespace, **get_dynamic_sidecar_env_vars(dynamic_sidecar_settings.REGISTRY), + **_get_dy_sidecar_env_vars(scheduler_data, app_settings), }, "Hosts": [], "Image": dynamic_sidecar_settings.DYNAMIC_SIDECAR_IMAGE, @@ -515,7 +190,7 @@ async def get_dynamic_sidecar_spec( }, } - _inject_settings_to_create_service_params( + inject_settings_to_create_service_params( labels_service_settings=settings, create_service_params=create_service_params, ) diff --git a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/spec_proxy.py b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/spec_proxy.py index 5a3dc8a058b..518ba98f11d 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/spec_proxy.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/spec_proxy.py @@ -39,7 +39,13 @@ async def get_dynamic_proxy_spec( dynamic_sidecar_settings.DYNAMIC_SIDECAR_PROXY_SETTINGS ) + # expose this service on an empty port + endpint_spec = {} + if dynamic_sidecar_settings.PROXY_EXPOSE_PORT: + endpint_spec["Ports"] = [{"Protocol": "tcp", "TargetPort": 80}] + return { + "endpoint_spec": endpint_spec, "labels": { # TODO: let's use a pydantic model with descriptions "io.simcore.zone": f"{dynamic_sidecar_settings.TRAEFIK_SIMCORE_ZONE}", diff --git a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/scheduler/events.py b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/scheduler/events.py index 1479ab27a64..e28e793c2f4 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/scheduler/events.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/scheduler/events.py @@ -5,10 +5,12 @@ import httpx from fastapi import FastAPI +from models_library.service_settings_labels import SimcoreServiceSettingsLabel +from servicelib.utils import logged_gather from tenacity._asyncio import AsyncRetrying from tenacity.before_sleep import before_sleep_log from tenacity.stop import stop_after_delay -from tenacity.wait import wait_fixed +from tenacity.wait import wait_exponential, wait_fixed from ....core.settings import DynamicSidecarSettings from ....models.schemas.dynamic_services import ( @@ -17,13 +19,16 @@ SchedulerData, ) from ....modules.director_v0 import DirectorV0Client -from ..client_api import get_dynamic_sidecar_client +from ..client_api import DynamicSidecarClient, get_dynamic_sidecar_client from ..docker_api import ( create_network, create_service_and_get_id, get_node_id_from_task_for_service, get_swarm_network, is_dynamic_sidecar_missing, + remove_dynamic_sidecar_network, + remove_dynamic_sidecar_stack, + remove_dynamic_sidecar_volumes, ) from ..docker_compose_specs import assemble_spec from ..docker_service_specs import ( @@ -32,7 +37,11 @@ get_dynamic_sidecar_spec, merge_settings_before_use, ) -from ..errors import DynamicSidecarNetworkError, EntrypointContainerNotFoundError +from ..errors import ( + DynamicSidecarNetworkError, + EntrypointContainerNotFoundError, + GenericDockerError, +) from .abc import DynamicSchedulerEvent logger = logging.getLogger(__name__) @@ -82,7 +91,7 @@ async def action(cls, app: FastAPI, scheduler_data: SchedulerData) -> None: # the provided docker-compose spec # also other encodes the env vars to target the proper container director_v0_client: DirectorV0Client = _get_director_v0_client(app) - settings = await merge_settings_before_use( + settings: SimcoreServiceSettingsLabel = await merge_settings_before_use( director_v0_client=director_v0_client, service_key=scheduler_data.key, service_tag=scheduler_data.version, @@ -116,6 +125,7 @@ async def action(cls, app: FastAPI, scheduler_data: SchedulerData) -> None: dynamic_sidecar_network_id=dynamic_sidecar_network_id, swarm_network_id=swarm_network_id, settings=settings, + app_settings=app.state.settings, ) logger.debug( "dynamic-sidecar create_service_params %s", @@ -184,11 +194,13 @@ async def action(cls, app: FastAPI, scheduler_data: SchedulerData) -> None: ) -class CreateUserServices(DynamicSchedulerEvent): +class PrepareServicesEnvironment(DynamicSchedulerEvent): """ Triggered when the dynamic-sidecar is responding to http requests. - The docker compose spec for the service is assembled. - The dynamic-sidecar is asked to start a service for that service spec. + This step runs before CreateUserServices. + + Sets up the environment on the host required by the service. + - restores service state """ @classmethod @@ -196,6 +208,32 @@ async def will_trigger(cls, app: FastAPI, scheduler_data: SchedulerData) -> bool return ( scheduler_data.dynamic_sidecar.status.current == DynamicSidecarStatus.OK and scheduler_data.dynamic_sidecar.is_available == True + and scheduler_data.dynamic_sidecar.service_environment_prepared == False + ) + + @classmethod + async def action(cls, app: FastAPI, scheduler_data: SchedulerData) -> None: + dynamic_sidecar_client = get_dynamic_sidecar_client(app) + dynamic_sidecar_endpoint = scheduler_data.dynamic_sidecar.endpoint + + logger.info("Calling into dynamic-sidecar to restore state") + await dynamic_sidecar_client.service_restore_state(dynamic_sidecar_endpoint) + logger.info("State restored by dynamic-sidecar") + + scheduler_data.dynamic_sidecar.service_environment_prepared = True + + +class CreateUserServices(DynamicSchedulerEvent): + """ + Triggered when the the environment was prepared. + The docker compose spec for the service is assembled. + The dynamic-sidecar is asked to start a service for that service spec. + """ + + @classmethod + async def will_trigger(cls, app: FastAPI, scheduler_data: SchedulerData) -> bool: + return ( + scheduler_data.dynamic_sidecar.service_environment_prepared and scheduler_data.dynamic_sidecar.compose_spec_submitted == False ) @@ -213,6 +251,7 @@ async def action(cls, app: FastAPI, scheduler_data: SchedulerData) -> None: app=app, service_key=scheduler_data.key, service_tag=scheduler_data.version, + paths_mapping=scheduler_data.paths_mapping, compose_spec=scheduler_data.compose_spec, container_http_entry=scheduler_data.container_http_entry, dynamic_sidecar_network_name=scheduler_data.dynamic_sidecar_network_name, @@ -254,9 +293,15 @@ async def action(cls, app: FastAPI, scheduler_data: SchedulerData) -> None: retry_error_cls=EntrypointContainerNotFoundError, before_sleep=before_sleep_log(logger, logging.WARNING), ): - # TODO: refactor, this needs to stop waiting when the service is marked for removal - # after merging of https://github.com/ITISFoundation/osparc-simcore/pull/2509 with attempt: + if scheduler_data.dynamic_sidecar.service_removal_state.was_removed: + # the service was removed while waiting for the operation to finish + logger.warning( + "Stopping `get_entrypoint_container_name` operation. " + "Will no try to start the service." + ) + return + entrypoint_container = await dynamic_sidecar_client.get_entrypoint_container_name( dynamic_sidecar_endpoint=dynamic_sidecar_endpoint, dynamic_sidecar_network_name=scheduler_data.dynamic_sidecar_network_name, @@ -291,10 +336,110 @@ async def action(cls, app: FastAPI, scheduler_data: SchedulerData) -> None: scheduler_data.dynamic_sidecar.was_compose_spec_submitted = True +class RemoveUserCreatedServices(DynamicSchedulerEvent): + """ + Triggered when the service is marked for removal. + + The state of the service will be stored. If dynamic-sidecar + is not reachable a warning is logged. + The outputs of the service wil be pushed. If dynamic-sidecar + is not reachable a warning is logged. + The dynamic-sidcar together with spawned containers + and dedicated network will be removed. + The scheduler will no longer track the service. + """ + + @classmethod + async def will_trigger(cls, app: FastAPI, scheduler_data: SchedulerData) -> bool: + return scheduler_data.dynamic_sidecar.service_removal_state.can_remove + + @classmethod + async def action(cls, app: FastAPI, scheduler_data: SchedulerData) -> None: + # invoke container cleanup at this point + dynamic_sidecar_client: DynamicSidecarClient = get_dynamic_sidecar_client(app) + + try: + await dynamic_sidecar_client.begin_service_destruction( + dynamic_sidecar_endpoint=scheduler_data.dynamic_sidecar.endpoint + ) + except Exception as e: # pylint: disable=broad-except + logger.warning( + "Could not contact dynamic-sidecar to begin destruction of %s\n%s", + scheduler_data.service_name, + str(e), + ) + + dynamic_sidecar_settings: DynamicSidecarSettings = ( + app.state.settings.DYNAMIC_SERVICES.DYNAMIC_SIDECAR + ) + + if scheduler_data.dynamic_sidecar.service_removal_state.can_save: + dynamic_sidecar_client = get_dynamic_sidecar_client(app) + dynamic_sidecar_endpoint = scheduler_data.dynamic_sidecar.endpoint + + logger.info( + "Calling into dynamic-sidecar to save state and pushing data to nodeports" + ) + try: + await logged_gather( + dynamic_sidecar_client.service_push_output_ports( + dynamic_sidecar_endpoint, + ), + dynamic_sidecar_client.service_save_state( + dynamic_sidecar_endpoint, + ), + ) + logger.info("State saved by dynamic-sidecar") + except Exception as e: # pylint: disable=broad-except + logger.warning( + ( + "Could not contact dynamic-sidecar to save service " + "state and upload outputs %s\n%s" + ), + scheduler_data.service_name, + str(e), + ) + + # remove the 2 services + await remove_dynamic_sidecar_stack( + node_uuid=scheduler_data.node_uuid, + dynamic_sidecar_settings=dynamic_sidecar_settings, + ) + # remove network + await remove_dynamic_sidecar_network( + scheduler_data.dynamic_sidecar_network_name + ) + + # remove created inputs and outputs volumes + async for attempt in AsyncRetrying( + wait=wait_exponential(min=1), + stop=stop_after_delay(20), + retry_error_cls=GenericDockerError, + ): + with attempt: + logger.info( + "Trying to remove volumes for %s", scheduler_data.service_name + ) + await remove_dynamic_sidecar_volumes(scheduler_data.node_uuid) + + logger.debug( + "Removed dynamic-sidecar created services for '%s'", + scheduler_data.service_name, + ) + + await app.state.dynamic_sidecar_scheduler.finish_service_removal( + scheduler_data.node_uuid + ) + + scheduler_data.dynamic_sidecar.service_removal_state.mark_removed() + + # register all handlers defined in this module here # A list is essential to guarantee execution order REGISTERED_EVENTS: List[Type[DynamicSchedulerEvent]] = [ CreateSidecars, GetStatus, + PrepareServicesEnvironment, CreateUserServices, + RemoveUserCreatedServices, ] diff --git a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/scheduler/task.py b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/scheduler/task.py index 928e081bcc4..6a2d66093b2 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/scheduler/task.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/scheduler/task.py @@ -1,9 +1,10 @@ import asyncio import logging import traceback -from asyncio import Lock, Task, sleep +from asyncio import Lock, Queue, Task, sleep from copy import deepcopy -from typing import Deque, Dict, Optional +from dataclasses import dataclass, field +from typing import Deque, Dict, List, Optional from uuid import UUID import httpx @@ -16,6 +17,7 @@ DynamicServicesSettings, DynamicSidecarSettings, ) +from ....models.domains.dynamic_services import RetrieveDataOutEnveloped from ....models.schemas.dynamic_services import ( AsyncResourceLock, DynamicSidecarStatus, @@ -33,8 +35,6 @@ are_all_services_present, get_dynamic_sidecar_state, get_dynamic_sidecars_to_observe, - remove_dynamic_sidecar_network, - remove_dynamic_sidecar_stack, ) from ..docker_states import ServiceState, extract_containers_minimim_statuses from ..errors import DynamicSidecarError, DynamicSidecarNotFoundError @@ -65,22 +65,10 @@ async def _apply_observation_cycle( logger.warning( "Removing service %s from observation", scheduler_data.service_name ) - await scheduler.remove_service( + await scheduler.mark_service_for_removal( node_uuid=scheduler_data.node_uuid, - save_state=scheduler_data.dynamic_sidecar.can_save_state, + can_save=scheduler_data.dynamic_sidecar.can_save_state, ) - return # pragma: no cover - - # if the service is not OK (for now failing) observation cycle will - # be skipped. This will allow for others to debug it - if scheduler_data.dynamic_sidecar.status.current != DynamicSidecarStatus.OK: - message = ( - f"Service {scheduler_data.service_name} is failing. Skipping observation.\n" - f"Scheduler data\n{scheduler_data}" - ) - # logging as error as this must be addressed by someone - logger.error(message) - return try: with timeout( @@ -106,15 +94,17 @@ async def _apply_observation_cycle( ) +@dataclass class DynamicSidecarsScheduler: - def __init__(self, app: FastAPI): - self._app: FastAPI = app - self._lock: Lock = Lock() + app: FastAPI - self._to_observe: Dict[str, LockWithSchedulerData] = {} - self._keep_running: bool = False - self._inverse_search_mapping: Dict[UUID, str] = {} - self._scheduler_task: Optional[Task] = None + _lock: Lock = field(default_factory=Lock) + _to_observe: Dict[str, LockWithSchedulerData] = field(default_factory=dict) + _keep_running: bool = False + _inverse_search_mapping: Dict[UUID, str] = field(default_factory=dict) + _scheduler_task: Optional[Task] = None + _trigger_observation_queue_task: Optional[Task] = None + _trigger_observation_queue: Queue = field(default_factory=Queue) async def add_service(self, scheduler_data: SchedulerData) -> None: """Invoked before the service is started @@ -145,12 +135,16 @@ async def add_service(self, scheduler_data: SchedulerData) -> None: resource_lock=AsyncResourceLock.from_is_locked(False), scheduler_data=scheduler_data, ) + + await self._enqueue_observation_from_service_name( + scheduler_data.service_name + ) logger.debug("Added service '%s' to observe", scheduler_data.service_name) - async def remove_service( - self, node_uuid: NodeID, save_state: Optional[bool] + async def mark_service_for_removal( + self, node_uuid: NodeID, can_save: Optional[bool] ) -> None: - """Handles the removal cycle of the services, saving states etc...""" + """Marks service for removal, causing RemoveMarkedService to trigger""" async with self._lock: if node_uuid not in self._inverse_search_mapping: raise DynamicSidecarNotFoundError(node_uuid) @@ -159,41 +153,31 @@ async def remove_service( if service_name not in self._to_observe: return - # invoke container cleanup at this point - dynamic_sidecar_client: DynamicSidecarClient = get_dynamic_sidecar_client( - self._app - ) - current: LockWithSchedulerData = self._to_observe[service_name] - dynamic_sidecar_endpoint = current.scheduler_data.dynamic_sidecar.endpoint - try: - await dynamic_sidecar_client.begin_service_destruction( - dynamic_sidecar_endpoint=dynamic_sidecar_endpoint - ) - except httpx.HTTPError: - logger.warning("Could not begin destruction of %s", service_name) - - dynamic_sidecar_settings: DynamicSidecarSettings = ( - self._app.state.settings.DYNAMIC_SERVICES.DYNAMIC_SIDECAR + current.scheduler_data.dynamic_sidecar.service_removal_state.mark_to_remove( + can_save ) - _ = save_state - # TODO: save state and others go here + await self._enqueue_observation_from_service_name(service_name) + logger.debug("Service '%s' marked for removal from scheduler", service_name) - # remove the 2 services - await remove_dynamic_sidecar_stack( - node_uuid=current.scheduler_data.node_uuid, - dynamic_sidecar_settings=dynamic_sidecar_settings, - ) - # remove network - await remove_dynamic_sidecar_network( - current.scheduler_data.dynamic_sidecar_network_name - ) + async def finish_service_removal(self, node_uuid: NodeID) -> None: + """ + directly invoked from RemoveMarkedService once it's finished + removes the service from the observation cycle + """ + async with self._lock: + if node_uuid not in self._inverse_search_mapping: + raise DynamicSidecarNotFoundError(node_uuid) + + service_name = self._inverse_search_mapping[node_uuid] + if service_name not in self._to_observe: + return - # finally remove it from the scheduler del self._to_observe[service_name] del self._inverse_search_mapping[node_uuid] - logger.debug("Removed service '%s' from scheduler", service_name) + + logger.debug("Removed service '%s' from scheduler", service_name) async def get_stack_status(self, node_uuid: NodeID) -> RunningDynamicServiceDetails: if node_uuid not in self._inverse_search_mapping: @@ -228,7 +212,7 @@ async def get_stack_status(self, node_uuid: NodeID) -> RunningDynamicServiceDeta ) dynamic_sidecar_client: DynamicSidecarClient = get_dynamic_sidecar_client( - self._app + self.app ) try: @@ -267,9 +251,32 @@ async def get_stack_status(self, node_uuid: NodeID) -> RunningDynamicServiceDeta service_message=container_message, ) - async def _runner(self) -> None: - """This code runs under a lock and can safely change the SchedulerData of all entries""" - logger.debug("Observing dynamic-sidecars") + async def retrieve_service_inputs( + self, node_uuid: NodeID, port_keys: List[str] + ) -> RetrieveDataOutEnveloped: + """Pulls data from input ports for the service""" + if node_uuid not in self._inverse_search_mapping: + raise DynamicSidecarNotFoundError(node_uuid) + + service_name = self._inverse_search_mapping[node_uuid] + scheduler_data: SchedulerData = self._to_observe[service_name].scheduler_data + + dynamic_sidecar_client: DynamicSidecarClient = get_dynamic_sidecar_client( + self.app + ) + + transferred_bytes = await dynamic_sidecar_client.service_pull_input_ports( + dynamic_sidecar_endpoint=scheduler_data.dynamic_sidecar.endpoint, + port_keys=port_keys, + ) + + return RetrieveDataOutEnveloped.from_transferred_bytes(transferred_bytes) + + async def _enqueue_observation_from_service_name(self, service_name: str) -> None: + await self._trigger_observation_queue.put(service_name) + + async def _run_trigger_observation_queue_task(self) -> None: + """generates events at regular time interval""" async def observing_single_service(service_name: str) -> None: lock_with_scheduler_data: LockWithSchedulerData = self._to_observe[ @@ -277,7 +284,7 @@ async def observing_single_service(service_name: str) -> None: ] scheduler_data: SchedulerData = lock_with_scheduler_data.scheduler_data try: - await _apply_observation_cycle(self._app, self, scheduler_data) + await _apply_observation_cycle(self.app, self, scheduler_data) except asyncio.CancelledError: # pylint: disable=try-except-raise raise # pragma: no cover except Exception: # pylint: disable=broad-except @@ -292,9 +299,15 @@ async def observing_single_service(service_name: str) -> None: # when done, always unlock the resource await lock_with_scheduler_data.resource_lock.unlock_resource() - # start observation for services which are - # not currently undergoing a observation cycle - for service_name in self._to_observe: + service_name: Optional[str] + while service_name := await self._trigger_observation_queue.get(): + logger.info("Handling observation for %s", service_name) + if service_name not in self._to_observe: + logger.debug( + "Skipping observation, service no longer found %s", service_name + ) + continue + lock_with_scheduler_data = self._to_observe[service_name] resource_marked_as_locked = ( await lock_with_scheduler_data.resource_lock.mark_as_locked_if_unlocked() @@ -303,16 +316,21 @@ async def observing_single_service(service_name: str) -> None: # fire and forget about the task asyncio.create_task(observing_single_service(service_name)) + logger.info("Scheduler 'trigger observation queue task' was shut down") + async def _run_scheduler_task(self) -> None: settings: DynamicServicesSchedulerSettings = ( - self._app.state.settings.DYNAMIC_SERVICES.DYNAMIC_SCHEDULER + self.app.state.settings.DYNAMIC_SERVICES.DYNAMIC_SCHEDULER ) while self._keep_running: - # make sure access to the dict is locked while the observation cycle is running + logger.debug("Observing dynamic-sidecars %s", self._to_observe.keys()) + try: + # prevent access to self._to_observe async with self._lock: - await self._runner() + for service_name in self._to_observe: + await self._enqueue_observation_from_service_name(service_name) await sleep(settings.DIRECTOR_V2_DYNAMIC_SCHEDULER_INTERVAL_SECONDS) except asyncio.CancelledError: # pragma: no cover @@ -320,21 +338,16 @@ async def _run_scheduler_task(self) -> None: logger.warning("Scheduler was shut down") - async def start(self) -> None: - # run as a background task - logging.info("Starting dynamic-sidecar scheduler") - self._keep_running = True - self._scheduler_task = asyncio.create_task(self._run_scheduler_task()) - - # discover all services which were started before and add them to the scheduler + async def _discover_running_services(self) -> None: + """discover all services which were started before and add them to the scheduler""" dynamic_sidecar_settings: DynamicSidecarSettings = ( - self._app.state.settings.DYNAMIC_SERVICES.DYNAMIC_SIDECAR + self.app.state.settings.DYNAMIC_SERVICES.DYNAMIC_SIDECAR ) services_to_observe: Deque[ ServiceLabelsStoredData ] = await get_dynamic_sidecars_to_observe(dynamic_sidecar_settings) - logging.info( + logger.info( "The following services need to be observed: %s", services_to_observe ) @@ -345,8 +358,19 @@ async def start(self) -> None: ) await self.add_service(scheduler_data) + async def start(self) -> None: + # run as a background task + logger.info("Starting dynamic-sidecar scheduler") + self._keep_running = True + self._scheduler_task = asyncio.create_task(self._run_scheduler_task()) + self._trigger_observation_queue_task = asyncio.create_task( + self._run_trigger_observation_queue_task() + ) + + await self._discover_running_services() + async def shutdown(self): - logging.info("Shutting down dynamic-sidecar scheduler") + logger.info("Shutting down dynamic-sidecar scheduler") self._keep_running = False self._inverse_search_mapping = {} self._to_observe = {} @@ -355,6 +379,12 @@ async def shutdown(self): await self._scheduler_task self._scheduler_task = None + if self._trigger_observation_queue_task is not None: + await self._trigger_observation_queue.put(None) + await self._trigger_observation_queue_task + self._trigger_observation_queue_task = None + self._trigger_observation_queue = Queue() + async def setup_scheduler(app: FastAPI): dynamic_sidecars_scheduler = DynamicSidecarsScheduler(app) diff --git a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/volumes_resolver.py b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/volumes_resolver.py new file mode 100644 index 00000000000..acdb4adb814 --- /dev/null +++ b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/volumes_resolver.py @@ -0,0 +1,30 @@ +import os +from pathlib import Path +from typing import Any, Dict + + +class DynamicSidecarVolumesPathsResolver: + BASE_PATH: str = "/dy-volumes" + + @classmethod + def _name_from_path(cls, path: Path) -> str: + return str(path).replace(os.sep, "_") + + @classmethod + def _target(cls, state_path: Path) -> str: + return f"{cls.BASE_PATH}/{cls._name_from_path(state_path).strip('_')}" + + @classmethod + def _source(cls, compose_namespace: str, state_path: Path) -> str: + return f"{compose_namespace}{cls._name_from_path(state_path)}" + + @classmethod + def mount_entry( + cls, compose_namespace: str, state_path: Path, node_uuid: str + ) -> Dict[str, Any]: + return { + "Source": cls._source(compose_namespace, state_path), + "Target": cls._target(state_path), + "Type": "volume", + "VolumeOptions": {"Labels": {"uuid": f"{node_uuid}"}}, + } diff --git a/services/director-v2/tests/conftest.py b/services/director-v2/tests/conftest.py index 783072ca3fd..e7d4d320710 100644 --- a/services/director-v2/tests/conftest.py +++ b/services/director-v2/tests/conftest.py @@ -259,6 +259,8 @@ def user_id() -> PositiveInt: @pytest.fixture(scope="module") def user_db(postgres_db: sa.engine.Engine, user_id: PositiveInt) -> Dict: with postgres_db.connect() as con: + # removes all users before continuing + con.execute(users.delete()) result = con.execute( users.insert() .values( @@ -276,7 +278,7 @@ def user_db(postgres_db: sa.engine.Engine, user_id: PositiveInt) -> Dict: yield dict(user) - con.execute(users.delete().where(users.c.id == user["id"])) + con.execute(users.delete().where(users.c.id == user_id)) @pytest.fixture diff --git a/services/director-v2/tests/integration/01/test_computation_api.py b/services/director-v2/tests/integration/01/test_computation_api.py index 41670c823f5..75b884af0ed 100644 --- a/services/director-v2/tests/integration/01/test_computation_api.py +++ b/services/director-v2/tests/integration/01/test_computation_api.py @@ -11,7 +11,6 @@ from copy import deepcopy from pathlib import Path from typing import Any, Callable, Dict, List -from uuid import UUID import pytest import sqlalchemy as sa @@ -24,15 +23,16 @@ from models_library.projects_state import RunningState from models_library.settings.rabbit import RabbitConfig from models_library.settings.redis import RedisConfig -from pydantic.networks import AnyHttpUrl from pydantic.types import PositiveInt -from requests.models import Response -from simcore_postgres_database.models.comp_tasks import comp_tasks -from simcore_postgres_database.models.projects import projects +from shared_comp_utils import ( + COMPUTATION_URL, + assert_computation_task_out_obj, + assert_pipeline_status, + create_pipeline, +) from simcore_service_director_v2.models.schemas.comp_tasks import ComputationTaskOut from starlette import status from starlette.testclient import TestClient -from tenacity import retry, retry_if_exception_type, stop_after_delay, wait_random from yarl import URL pytest_simcore_core_services_selection = [ @@ -46,93 +46,6 @@ ] pytest_simcore_ops_services_selection = ["minio", "adminer", "flower"] -COMPUTATION_URL: str = "v2/computations" - -# HELPERS --------------------------------------- - - -def _assert_pipeline_status( - client: TestClient, - url: AnyHttpUrl, - user_id: PositiveInt, - project_uuid: UUID, - wait_for_states: List[RunningState] = None, -) -> ComputationTaskOut: - if not wait_for_states: - wait_for_states = [ - RunningState.SUCCESS, - RunningState.FAILED, - RunningState.ABORTED, - ] - - MAX_TIMEOUT_S = 60 - - @retry( - stop=stop_after_delay(MAX_TIMEOUT_S), - wait=wait_random(0, 2), - retry=retry_if_exception_type(AssertionError), - reraise=True, - ) - def check_pipeline_state() -> ComputationTaskOut: - response = client.get(url, params={"user_id": user_id}) - assert ( - response.status_code == status.HTTP_202_ACCEPTED - ), f"response code is {response.status_code}, error: {response.text}" - task_out = ComputationTaskOut.parse_obj(response.json()) - assert task_out.id == project_uuid - assert task_out.url == f"{client.base_url}/v2/computations/{project_uuid}" - print("Pipeline is in ", task_out.state) - assert task_out.state in wait_for_states - return task_out - - task_out = check_pipeline_state() - - return task_out - - -def _create_pipeline( - client: TestClient, - *, - project: ProjectAtDB, - user_id: PositiveInt, - start_pipeline: bool, - expected_response_status_code: int, - **kwargs, -) -> Response: - response = client.post( - COMPUTATION_URL, - json={ - "user_id": user_id, - "project_id": str(project.uuid), - "start_pipeline": start_pipeline, - **kwargs, - }, - ) - assert ( - response.status_code == expected_response_status_code - ), f"response code is {response.status_code}, error: {response.text}" - return response - - -def _assert_computation_task_out_obj( - client: TestClient, - task_out: ComputationTaskOut, - *, - project: ProjectAtDB, - exp_task_state: RunningState, - exp_pipeline_details: PipelineDetails, -): - assert task_out.id == project.uuid - assert task_out.state == exp_task_state - assert task_out.url == f"{client.base_url}/v2/computations/{project.uuid}" - assert task_out.stop_url == ( - f"{client.base_url}/v2/computations/{project.uuid}:stop" - if exp_task_state in [RunningState.PUBLISHED, RunningState.PENDING] - else None - ) - # check pipeline details contents - assert task_out.pipeline_details == exp_pipeline_details - # FIXTURES --------------------------------------- @@ -174,34 +87,6 @@ def minimal_configuration( pass -@pytest.fixture -def update_project_workbench_with_comp_tasks(postgres_db: sa.engine.Engine) -> Callable: - def updator(project_uuid: str): - with postgres_db.connect() as con: - result = con.execute( - projects.select().where(projects.c.uuid == project_uuid) - ) - prj_row = result.first() - prj_workbench = prj_row.workbench - - result = con.execute( - comp_tasks.select().where(comp_tasks.c.project_id == project_uuid) - ) - # let's get the results and run_hash - for task_row in result: - # pass these to the project workbench - prj_workbench[task_row.node_id]["outputs"] = task_row.outputs - prj_workbench[task_row.node_id]["runHash"] = task_row.run_hash - - con.execute( - projects.update() - .values(workbench=prj_workbench) - .where(projects.c.uuid == project_uuid) - ) - - yield updator - - @pytest.fixture(scope="session") def fake_workbench_node_states_file(mocks_dir: Path) -> Path: file_path = mocks_dir / "fake_workbench_computational_node_states.json" @@ -288,7 +173,7 @@ def test_start_empty_computation( ): # send an empty project to process empty_project = project() - _create_pipeline( + create_pipeline( client, project=empty_project, user_id=user_id, @@ -449,7 +334,7 @@ def _convert_to_pipeline_details( ) # send a valid project with sleepers - response = _create_pipeline( + response = create_pipeline( client, project=sleepers_project, user_id=user_id, @@ -463,7 +348,7 @@ def _convert_to_pipeline_details( ) task_out = ComputationTaskOut.parse_obj(response.json()) # check the contents is correctb - _assert_computation_task_out_obj( + assert_computation_task_out_obj( client, task_out, project=sleepers_project, @@ -472,13 +357,13 @@ def _convert_to_pipeline_details( ) # now wait for the computation to finish - task_out = _assert_pipeline_status( + task_out = assert_pipeline_status( client, task_out.url, user_id, sleepers_project.uuid ) expected_pipeline_details_after_run = _convert_to_pipeline_details( sleepers_project, exp_pipeline_adj_list, exp_node_states_after_run ) - _assert_computation_task_out_obj( + assert_computation_task_out_obj( client, task_out, project=sleepers_project, @@ -490,7 +375,7 @@ def _convert_to_pipeline_details( # FIXME: currently the webserver is the one updating the projects table so we need to fake this by copying the run_hash update_project_workbench_with_comp_tasks(str(sleepers_project.uuid)) - response = _create_pipeline( + response = create_pipeline( client, project=sleepers_project, user_id=user_id, @@ -510,7 +395,7 @@ def _convert_to_pipeline_details( node_data.current_status = expected_pipeline_details.node_states[ node_id ].current_status - response = _create_pipeline( + response = create_pipeline( client, project=sleepers_project, user_id=user_id, @@ -525,7 +410,7 @@ def _convert_to_pipeline_details( ) task_out = ComputationTaskOut.parse_obj(response.json()) - _assert_computation_task_out_obj( + assert_computation_task_out_obj( client, task_out, project=sleepers_project, @@ -534,7 +419,7 @@ def _convert_to_pipeline_details( ) # now wait for the computation to finish - task_out = _assert_pipeline_status( + task_out = assert_pipeline_status( client, task_out.url, user_id, sleepers_project.uuid ) @@ -551,7 +436,7 @@ def test_run_computation( ): sleepers_project = project(workbench=fake_workbench_without_outputs) # send a valid project with sleepers - response = _create_pipeline( + response = create_pipeline( client, project=sleepers_project, user_id=user_id, @@ -561,7 +446,7 @@ def test_run_computation( task_out = ComputationTaskOut.parse_obj(response.json()) # check the contents is correct: a pipeline that just started gets PUBLISHED - _assert_computation_task_out_obj( + assert_computation_task_out_obj( client, task_out, project=sleepers_project, @@ -570,7 +455,7 @@ def test_run_computation( ) # wait for the computation to start - _assert_pipeline_status( + assert_pipeline_status( client, task_out.url, user_id, @@ -579,11 +464,11 @@ def test_run_computation( ) # wait for the computation to finish (either by failing, success or abort) - task_out = _assert_pipeline_status( + task_out = assert_pipeline_status( client, task_out.url, user_id, sleepers_project.uuid ) - _assert_computation_task_out_obj( + assert_computation_task_out_obj( client, task_out, project=sleepers_project, @@ -594,7 +479,7 @@ def test_run_computation( # FIXME: currently the webserver is the one updating the projects table so we need to fake this by copying the run_hash update_project_workbench_with_comp_tasks(str(sleepers_project.uuid)) # run again should return a 422 cause everything is uptodate - response = _create_pipeline( + response = create_pipeline( client, project=sleepers_project, user_id=user_id, @@ -613,7 +498,7 @@ def test_run_computation( node_id ].current_status ) - response = _create_pipeline( + response = create_pipeline( client, project=sleepers_project, user_id=user_id, @@ -623,7 +508,7 @@ def test_run_computation( ) task_out = ComputationTaskOut.parse_obj(response.json()) # check the contents is correct - _assert_computation_task_out_obj( + assert_computation_task_out_obj( client, task_out, project=sleepers_project, @@ -632,10 +517,10 @@ def test_run_computation( ) # wait for the computation to finish - task_out = _assert_pipeline_status( + task_out = assert_pipeline_status( client, task_out.url, user_id, sleepers_project.uuid ) - _assert_computation_task_out_obj( + assert_computation_task_out_obj( client, task_out, project=sleepers_project, @@ -654,7 +539,7 @@ def test_abort_computation( ): sleepers_project = project(workbench=fake_workbench_without_outputs) # send a valid project with sleepers - response = _create_pipeline( + response = create_pipeline( client, project=sleepers_project, user_id=user_id, @@ -664,7 +549,7 @@ def test_abort_computation( task_out = ComputationTaskOut.parse_obj(response.json()) # check the contents is correctb - _assert_computation_task_out_obj( + assert_computation_task_out_obj( client, task_out, project=sleepers_project, @@ -673,7 +558,7 @@ def test_abort_computation( ) # wait until the pipeline is started - task_out = _assert_pipeline_status( + task_out = assert_pipeline_status( client, task_out.url, user_id, @@ -702,7 +587,7 @@ def test_abort_computation( assert task_out.stop_url == None # check that the pipeline is aborted/stopped - task_out = _assert_pipeline_status( + task_out = assert_pipeline_status( client, task_out.url, user_id, @@ -723,7 +608,7 @@ def test_update_and_delete_computation( ): sleepers_project = project(workbench=fake_workbench_without_outputs) # send a valid project with sleepers - response = _create_pipeline( + response = create_pipeline( client, project=sleepers_project, user_id=user_id, @@ -733,7 +618,7 @@ def test_update_and_delete_computation( task_out = ComputationTaskOut.parse_obj(response.json()) # check the contents is correctb - _assert_computation_task_out_obj( + assert_computation_task_out_obj( client, task_out, project=sleepers_project, @@ -742,7 +627,7 @@ def test_update_and_delete_computation( ) # update the pipeline - response = _create_pipeline( + response = create_pipeline( client, project=sleepers_project, user_id=user_id, @@ -752,7 +637,7 @@ def test_update_and_delete_computation( task_out = ComputationTaskOut.parse_obj(response.json()) # check the contents is correctb - _assert_computation_task_out_obj( + assert_computation_task_out_obj( client, task_out, project=sleepers_project, @@ -761,7 +646,7 @@ def test_update_and_delete_computation( ) # update the pipeline - response = _create_pipeline( + response = create_pipeline( client, project=sleepers_project, user_id=user_id, @@ -771,7 +656,7 @@ def test_update_and_delete_computation( task_out = ComputationTaskOut.parse_obj(response.json()) # check the contents is correctb - _assert_computation_task_out_obj( + assert_computation_task_out_obj( client, task_out, project=sleepers_project, @@ -780,7 +665,7 @@ def test_update_and_delete_computation( ) # start it now - response = _create_pipeline( + response = create_pipeline( client, project=sleepers_project, user_id=user_id, @@ -789,7 +674,7 @@ def test_update_and_delete_computation( ) task_out = ComputationTaskOut.parse_obj(response.json()) # check the contents is correctb - _assert_computation_task_out_obj( + assert_computation_task_out_obj( client, task_out, project=sleepers_project, @@ -798,7 +683,7 @@ def test_update_and_delete_computation( ) # wait until the pipeline is started - task_out = _assert_pipeline_status( + task_out = assert_pipeline_status( client, task_out.url, user_id, @@ -810,7 +695,7 @@ def test_update_and_delete_computation( ), f"pipeline is not in the expected starting state but in {task_out.state}" # now try to update the pipeline, is expected to be forbidden - response = _create_pipeline( + response = create_pipeline( client, project=sleepers_project, user_id=user_id, @@ -850,7 +735,7 @@ def test_pipeline_with_no_comp_services_still_create_correct_comp_tasks( ) # this pipeline is not runnable as there are no computational services - response = _create_pipeline( + response = create_pipeline( client, project=project_with_dynamic_node, user_id=user_id, @@ -859,7 +744,7 @@ def test_pipeline_with_no_comp_services_still_create_correct_comp_tasks( ) # still this pipeline shall be createable if we do not want to start it - response = _create_pipeline( + response = create_pipeline( client, project=project_with_dynamic_node, user_id=user_id, @@ -1025,7 +910,7 @@ async def test_burst_create_computations( sleepers_project = project(workbench=fake_workbench_without_outputs) sleepers_project2 = project(workbench=fake_workbench_without_outputs) - async def _create_pipeline(project: ProjectAtDB, start_pipeline: bool): + async def create_pipeline(project: ProjectAtDB, start_pipeline: bool): return await async_client.post( COMPUTATION_URL, json={ @@ -1043,10 +928,10 @@ async def _create_pipeline(project: ProjectAtDB, start_pipeline: bool): responses = await asyncio.gather( *( [ - _create_pipeline(sleepers_project, start_pipeline=False) + create_pipeline(sleepers_project, start_pipeline=False) for _ in range(NUMBER_OF_CALLS) ] - + [_create_pipeline(sleepers_project2, start_pipeline=False)] + + [create_pipeline(sleepers_project2, start_pipeline=False)] ) ) received_status_codes = [r.status_code for r in responses] @@ -1058,10 +943,10 @@ async def _create_pipeline(project: ProjectAtDB, start_pipeline: bool): responses = await asyncio.gather( *( [ - _create_pipeline(sleepers_project, start_pipeline=True) + create_pipeline(sleepers_project, start_pipeline=True) for _ in range(NUMBER_OF_CALLS) ] - + [_create_pipeline(sleepers_project2, start_pipeline=False)] + + [create_pipeline(sleepers_project2, start_pipeline=False)] ) ) received_status_codes = [r.status_code for r in responses] diff --git a/services/director-v2/tests/integration/02/conftest.py b/services/director-v2/tests/integration/02/conftest.py index ae9af023fbb..f56021dd83d 100644 --- a/services/director-v2/tests/integration/02/conftest.py +++ b/services/director-v2/tests/integration/02/conftest.py @@ -7,7 +7,7 @@ @pytest.fixture def network_name() -> str: - return "test_swarm_network_name" + return "pytest-simcore_interactive_services_subnet" @pytest.fixture diff --git a/services/director-v2/tests/integration/02/test__dynamic_sidecar_nodeports_integration.py b/services/director-v2/tests/integration/02/test__dynamic_sidecar_nodeports_integration.py new file mode 100644 index 00000000000..6fcc6f117c5 --- /dev/null +++ b/services/director-v2/tests/integration/02/test__dynamic_sidecar_nodeports_integration.py @@ -0,0 +1,970 @@ +# pylint:disable=unused-argument +# pylint:disable=redefined-outer-name + +import asyncio +import hashlib +import json +import logging +import os +from asyncio import BaseEventLoop +from collections import namedtuple +from itertools import tee +from pathlib import Path +from pprint import pformat +from typing import ( + Any, + AsyncIterable, + Callable, + Dict, + Iterable, + Iterator, + List, + Set, + Tuple, + cast, +) +from uuid import uuid4 + +import aiodocker +import aiopg.sa +import httpx +import pytest +import sqlalchemy as sa +from aiodocker.containers import DockerContainer +from asgi_lifespan import LifespanManager +from fastapi import FastAPI +from models_library.projects import Node, ProjectAtDB, Workbench +from models_library.projects_pipeline import PipelineDetails +from models_library.projects_state import RunningState +from models_library.settings.rabbit import RabbitConfig +from models_library.settings.redis import RedisConfig +from py._path.local import LocalPath +from pytest_mock.plugin import MockerFixture +from shared_comp_utils import ( + assert_computation_task_out_obj, + assert_pipeline_status, + create_pipeline, +) +from simcore_postgres_database.models.comp_pipeline import comp_pipeline +from simcore_postgres_database.models.comp_tasks import comp_tasks +from simcore_sdk import node_ports_v2 +from simcore_sdk.node_data import data_manager +from simcore_sdk.node_ports_v2 import DBManager, Nodeports, Port +from simcore_service_director_v2.core.application import init_app +from simcore_service_director_v2.core.settings import AppSettings +from simcore_service_director_v2.models.schemas.comp_tasks import ComputationTaskOut +from simcore_service_director_v2.models.schemas.constants import ( + DYNAMIC_SIDECAR_SERVICE_PREFIX, +) +from starlette import status +from starlette.testclient import TestClient +from utils import ( + SEPARATOR, + assert_all_services_running, + assert_retrieve_service, + assert_services_reply_200, + assert_start_service, + assert_stop_service, + ensure_network_cleanup, + get_director_v0_patched_url, + is_legacy, + patch_dynamic_service_url, +) +from yarl import URL + +pytest_simcore_core_services_selection = [ + "postgres", + "redis", + "rabbit", + "storage", + "catalog", + "director", + "dask-scheduler", + "dask-sidecar", +] + +pytest_simcore_ops_services_selection = [ + "minio", + "adminer", +] + + +ServicesNodeUUIDs = namedtuple("ServicesNodeUUIDs", "sleeper, dy, dy_compose_spec") +InputsOutputs = namedtuple("InputsOutputs", "inputs, outputs") + +DY_SERVICES_STATE_PATH: Path = Path("/dy-volumes/workdir_generated-data") +TIMEOUT_DETECT_DYNAMIC_SERVICES_STOPPED = 60 +TIMEOUT_OUTPUTS_UPLOAD_FINISH_DETECTED = 60 +POSSIBLE_ISSUE_WORKAROUND = 10 + + +logger = logging.getLogger(__name__) + +# FIXTURES + + +@pytest.fixture +def minimal_configuration( # pylint:disable=too-many-arguments + sleeper_service: Dict, + dy_static_file_server_dynamic_sidecar_service: Dict, + dy_static_file_server_dynamic_sidecar_compose_spec_service: Dict, + redis_service: RedisConfig, + postgres_db: sa.engine.Engine, + postgres_host_config: Dict[str, str], + rabbit_service: RabbitConfig, + simcore_services: None, + dask_scheduler_service: None, + dask_sidecar_service: None, + ensure_swarm_and_networks: None, +) -> Iterator[None]: + with postgres_db.connect() as conn: + conn.execute(comp_tasks.delete()) + conn.execute(comp_pipeline.delete()) + yield + + +@pytest.fixture +def fake_dy_workbench( + mocks_dir: Path, + sleeper_service: Dict, + dy_static_file_server_dynamic_sidecar_service: Dict, + dy_static_file_server_dynamic_sidecar_compose_spec_service: Dict, +) -> Dict[str, Any]: + dy_workbench_template = mocks_dir / "fake_dy_workbench_template.json" + assert dy_workbench_template.exists() + + file_content = dy_workbench_template.read_text() + file_as_dict = json.loads(file_content) + + def _assert_version(registry_service_data: Dict) -> None: + key = registry_service_data["schema"]["key"] + version = registry_service_data["schema"]["version"] + found = False + for workbench_service_data in file_as_dict.values(): + if ( + workbench_service_data["key"] == key + and workbench_service_data["version"] == version + ): + found = True + break + + # when updating the services, this check will fail + # bump versions in the mocks if no breaking changes + # have been made + error_message = ( + f"Did not find service: key={key}, version={version}! in {file_as_dict}" + ) + assert found is True, error_message + + _assert_version(sleeper_service) + _assert_version(dy_static_file_server_dynamic_sidecar_service) + _assert_version(dy_static_file_server_dynamic_sidecar_compose_spec_service) + + return file_as_dict + + +@pytest.fixture +def fake_dy_success(mocks_dir: Path) -> Dict[str, Any]: + fake_dy_status_success = mocks_dir / "fake_dy_status_success.json" + assert fake_dy_status_success.exists() + return json.loads(fake_dy_status_success.read_text()) + + +@pytest.fixture +def fake_dy_published(mocks_dir: Path) -> Dict[str, Any]: + fake_dy_status_published = mocks_dir / "fake_dy_status_published.json" + assert fake_dy_status_published.exists() + return json.loads(fake_dy_status_published.read_text()) + + +@pytest.fixture +def services_node_uuids( + fake_dy_workbench: Dict[str, Any], + sleeper_service: Dict, + dy_static_file_server_dynamic_sidecar_service: Dict, + dy_static_file_server_dynamic_sidecar_compose_spec_service: Dict, +) -> ServicesNodeUUIDs: + def _get_node_uuid(registry_service_data: Dict) -> str: + key = registry_service_data["schema"]["key"] + version = registry_service_data["schema"]["version"] + + for node_uuid, workbench_service_data in fake_dy_workbench.items(): + if ( + workbench_service_data["key"] == key + and workbench_service_data["version"] == version + ): + return node_uuid + + assert False, f"No node_uuid found for {key}:{version}" + + return ServicesNodeUUIDs( + sleeper=_get_node_uuid(sleeper_service), + dy=_get_node_uuid(dy_static_file_server_dynamic_sidecar_service), + dy_compose_spec=_get_node_uuid( + dy_static_file_server_dynamic_sidecar_compose_spec_service + ), + ) + + +@pytest.fixture +def current_study(project: Callable, fake_dy_workbench: Dict[str, Any]) -> ProjectAtDB: + return project(workbench=fake_dy_workbench) + + +@pytest.fixture +def workbench_dynamic_services( + current_study: ProjectAtDB, sleeper_service: Dict +) -> Dict[str, Node]: + sleeper_key = sleeper_service["schema"]["key"] + result = {k: v for k, v in current_study.workbench.items() if v.key != sleeper_key} + assert len(result) == 2 + return result + + +@pytest.fixture +async def db_manager(postgres_dsn: Dict[str, str]) -> AsyncIterable[DBManager]: + dsn = "postgresql://{user}:{password}@{host}:{port}/{database}".format( + **postgres_dsn + ) + async with aiopg.sa.create_engine(dsn) as db_engine: + yield DBManager(db_engine) + + +@pytest.fixture +async def fast_api_app( + minimal_configuration: None, network_name: str, monkeypatch +) -> FastAPI: + # Works as below line in docker.compose.yml + # ${DOCKER_REGISTRY:-itisfoundation}/dynamic-sidecar:${DOCKER_IMAGE_TAG:-latest} + + registry = os.environ.get("DOCKER_REGISTRY", "local") + image_tag = os.environ.get("DOCKER_IMAGE_TAG", "production") + + image_name = f"{registry}/dynamic-sidecar:{image_tag}" + + logger.warning("Patching to: DYNAMIC_SIDECAR_IMAGE=%s", image_name) + monkeypatch.setenv("DYNAMIC_SIDECAR_IMAGE", image_name) + monkeypatch.setenv("TRAEFIK_SIMCORE_ZONE", "test_traefik_zone") + monkeypatch.setenv("SWARM_STACK_NAME", "test_swarm_name") + + monkeypatch.setenv("SC_BOOT_MODE", "production") + monkeypatch.setenv("DYNAMIC_SIDECAR_EXPOSE_PORT", "true") + monkeypatch.setenv("PROXY_EXPOSE_PORT", "true") + monkeypatch.setenv("SIMCORE_SERVICES_NETWORK_NAME", network_name) + monkeypatch.delenv("DYNAMIC_SIDECAR_MOUNT_PATH_DEV", raising=False) + monkeypatch.setenv("DIRECTOR_V2_DYNAMIC_SCHEDULER_ENABLED", "true") + monkeypatch.setenv("DIRECTOR_V2_CELERY_SCHEDULER_ENABLED", "false") + monkeypatch.setenv("DYNAMIC_SIDECAR_TRAEFIK_ACCESS_LOG", "true") + monkeypatch.setenv("DYNAMIC_SIDECAR_TRAEFIK_LOGLEVEL", "debug") + + settings = AppSettings.create_from_envs() + + app = init_app(settings) + return app + + +@pytest.fixture +async def director_v2_client( + loop: BaseEventLoop, fast_api_app: FastAPI +) -> AsyncIterable[httpx.AsyncClient]: + async with LifespanManager(fast_api_app): + async with httpx.AsyncClient( + app=fast_api_app, base_url="http://testserver/v2" + ) as client: + yield client + + +@pytest.fixture +def client(fast_api_app: FastAPI) -> TestClient: + """required to avoid rewriting existing code""" + return TestClient(fast_api_app, raise_server_exceptions=True) + + +@pytest.fixture +async def cleanup_services_and_networks( + workbench_dynamic_services: Dict[str, Node], + current_study: ProjectAtDB, + director_v2_client: httpx.AsyncClient, +) -> AsyncIterable[None]: + yield None + # ensure service cleanup when done testing + async with aiodocker.Docker() as docker_client: + service_names = {x["Spec"]["Name"] for x in await docker_client.services.list()} + + # grep the names of the services + for node_uuid in workbench_dynamic_services: + for service_name in service_names: + # if node_uuid is present in the service name it needs to be removed + if node_uuid in service_name: + delete_result = await docker_client.services.delete(service_name) + assert delete_result is True + + project_id = f"{current_study.uuid}" + + # pylint: disable=protected-access + scheduler_interval = ( + director_v2_client._transport.app.state.settings.DYNAMIC_SERVICES.DYNAMIC_SCHEDULER.DIRECTOR_V2_DYNAMIC_SCHEDULER_INTERVAL_SECONDS + ) + # sleep enough to ensure the observation cycle properly stopped the service + await asyncio.sleep(2 * scheduler_interval) + await ensure_network_cleanup(docker_client, project_id) + + +@pytest.fixture +def temp_dir(tmpdir: LocalPath) -> Path: + return Path(tmpdir) + + +# UTILS + + +async def _get_mapped_nodeports_values( + user_id: int, project_id: str, workbench: Workbench, db_manager: DBManager +) -> Dict[str, InputsOutputs]: + result: Dict[str, InputsOutputs] = {} + + for node_uuid in workbench: + PORTS: Nodeports = await node_ports_v2.ports( + user_id=user_id, + project_id=project_id, + node_uuid=str(node_uuid), + db_manager=db_manager, + ) + result[str(node_uuid)] = InputsOutputs( + inputs={ + node_input.key: node_input + for node_input in (await PORTS.inputs).values() + }, + outputs={ + node_output.key: node_output + for node_output in (await PORTS.outputs).values() + }, + ) + + return result + + +def _print_values_to_assert(**kwargs) -> None: + print("Values to assert", ", ".join(f"{k}={v}" for k, v in kwargs.items())) + + +async def _assert_port_values( + mapped: Dict[str, InputsOutputs], + services_node_uuids: ServicesNodeUUIDs, +): + print("Nodeport mapped values") + for node_uuid, inputs_outputs in mapped.items(): + print("Port values for", node_uuid) + print("INPUTS") + for value in inputs_outputs.inputs.values(): + print(value.key, value) + print("OUTPUTS") + for value in inputs_outputs.outputs.values(): + print(value.key, value) + + # integer values + sleeper_out_2 = await mapped[services_node_uuids.sleeper].outputs["out_2"].get() + dy_integer_intput = ( + await mapped[services_node_uuids.dy].inputs["integer_input"].get() + ) + dy_integer_output = ( + await mapped[services_node_uuids.dy].outputs["integer_output"].get() + ) + + dy_compose_spec_integer_intput = ( + await mapped[services_node_uuids.dy_compose_spec].inputs["integer_input"].get() + ) + dy_compose_spec_integer_output = ( + await mapped[services_node_uuids.dy_compose_spec] + .outputs["integer_output"] + .get() + ) + + _print_values_to_assert( + sleeper_out_2=sleeper_out_2, + dy_integer_intput=dy_integer_intput, + dy_integer_output=dy_integer_output, + dy_compose_spec_integer_intput=dy_compose_spec_integer_intput, + dy_compose_spec_integer_output=dy_compose_spec_integer_output, + ) + + assert sleeper_out_2 == dy_integer_intput + assert sleeper_out_2 == dy_integer_output + assert sleeper_out_2 == dy_compose_spec_integer_intput + assert sleeper_out_2 == dy_compose_spec_integer_output + + # files + + async def _int_value_port(port: Port) -> int: + file_path: Path = cast(Path, await port.get()) + int_value = int(file_path.read_text()) + return int_value + + sleeper_out_1 = await _int_value_port( + mapped[services_node_uuids.sleeper].outputs["out_1"] + ) + + dy_file_input = await _int_value_port( + mapped[services_node_uuids.dy].inputs["file_input"] + ) + dy_file_output = await _int_value_port( + mapped[services_node_uuids.dy].outputs["file_output"] + ) + + dy_compose_spec_file_input = await _int_value_port( + mapped[services_node_uuids.dy_compose_spec].inputs["file_input"] + ) + dy_compose_spec_file_output = await _int_value_port( + mapped[services_node_uuids.dy_compose_spec].outputs["file_output"] + ) + + _print_values_to_assert( + sleeper_out_1=sleeper_out_1, + dy_file_input=dy_file_input, + dy_file_output=dy_file_output, + dy_compose_spec_file_input=dy_compose_spec_file_input, + dy_compose_spec_file_output=dy_compose_spec_file_output, + ) + + assert sleeper_out_1 == dy_file_input + assert sleeper_out_1 == dy_file_output + assert sleeper_out_1 == dy_compose_spec_file_input + assert sleeper_out_1 == dy_compose_spec_file_output + + +def _patch_postgres_address(director_v2_client: httpx.AsyncClient) -> None: + # the dynamic-sidecar cannot reach postgres via port + # forwarding to localhost. the docker postgres host must be used + + # pylint: disable=protected-access + director_v2_client._transport.app.state.settings.POSTGRES.__config__.allow_mutation = ( + True + ) + director_v2_client._transport.app.state.settings.POSTGRES.__config__.frozen = False + director_v2_client._transport.app.state.settings.POSTGRES.POSTGRES_HOST = "postgres" + + +def _assert_command_successful(command: str) -> None: + print(command) + assert os.system(command) == 0 + + +async def _container_id_via_services(service_uuid: str) -> str: + container_id = None + + service_name = f"{DYNAMIC_SIDECAR_SERVICE_PREFIX}_{service_uuid}" + async with aiodocker.Docker() as docker_client: + service_id = None + for service in await docker_client.services.list(): + if service["Spec"]["Name"] == service_name: + service_id = service["ID"] + break + assert ( + service_id is not None + ), f"No service found for service name: {service_name}" + + for task in await docker_client.tasks.list(): + if task["ServiceID"] == service_id: + assert task["Status"]["State"] == "running" + container_id = task["Status"]["ContainerStatus"]["ContainerID"] + break + + assert ( + container_id is not None + ), f"No container found for service name {service_name}" + + return container_id + + +async def _fetch_data_from_container( + dir_tag: str, service_uuid: str, temp_dir: Path +) -> Path: + container_id = await _container_id_via_services(service_uuid) + + target_path = temp_dir / f"container_{dir_tag}_{uuid4()}" + target_path.mkdir(parents=True, exist_ok=True) + + _assert_command_successful( + f"docker cp {container_id}:/{DY_SERVICES_STATE_PATH}/. {target_path}" + ) + + return target_path + + +async def _fetch_data_via_data_manager( + dir_tag: str, user_id: int, project_id: str, service_uuid: str, temp_dir: Path +) -> Path: + save_to = temp_dir / f"data-manager_{dir_tag}_{uuid4()}" + save_to.mkdir(parents=True, exist_ok=True) + + assert ( + await data_manager.is_file_present_in_storage( + user_id=user_id, + project_id=project_id, + node_uuid=service_uuid, + file_path=DY_SERVICES_STATE_PATH, + ) + is True + ) + + await data_manager.pull( + user_id=user_id, + project_id=project_id, + node_uuid=service_uuid, + file_or_folder=DY_SERVICES_STATE_PATH, + save_to=save_to, + ) + + return save_to + + +async def _wait_for_dynamic_services_to_be_running( + director_v2_client: httpx.AsyncClient, + director_v0_url: URL, + user_id: int, + workbench_dynamic_services: Dict[str, Node], + current_study: ProjectAtDB, +) -> Dict[str, str]: + # start dynamic services + await asyncio.gather( + *( + assert_start_service( + director_v2_client=director_v2_client, + director_v0_url=director_v0_url, + user_id=user_id, + project_id=str(current_study.uuid), + service_key=node.key, + service_version=node.version, + service_uuid=service_uuid, + basepath=f"/x/{service_uuid}" if is_legacy(node) else None, + ) + for service_uuid, node in workbench_dynamic_services.items() + ) + ) + + dynamic_services_urls: Dict[str, str] = {} + + for service_uuid in workbench_dynamic_services: + dynamic_service_url = await patch_dynamic_service_url( + # pylint: disable=protected-access + app=director_v2_client._transport.app, + node_uuid=service_uuid, + ) + dynamic_services_urls[service_uuid] = dynamic_service_url + + await assert_all_services_running( + director_v2_client, director_v0_url, workbench=workbench_dynamic_services + ) + + await assert_services_reply_200( + director_v2_client=director_v2_client, + director_v0_url=director_v0_url, + workbench=workbench_dynamic_services, + ) + + return dynamic_services_urls + + +async def _wait_for_dy_services_to_fully_stop( + director_v2_client: httpx.AsyncClient, +) -> None: + # pylint: disable=protected-access + to_observe = ( + director_v2_client._transport.app.state.dynamic_sidecar_scheduler._to_observe + ) + + for i in range(TIMEOUT_DETECT_DYNAMIC_SERVICES_STOPPED): + print( + ( + f"Sleeping for {i+1}/{TIMEOUT_DETECT_DYNAMIC_SERVICES_STOPPED} " + "seconds while waiting for removal of all dynamic-sidecars" + ) + ) + await asyncio.sleep(1) + if len(to_observe) == 0: + break + + if i == TIMEOUT_DETECT_DYNAMIC_SERVICES_STOPPED - 1: + assert False, "Timeout reached" + + +def _pairwise(iterable) -> Iterable[Tuple[Any, Any]]: + "s -> (s0,s1), (s1,s2), (s2, s3), ..." + a, b = tee(iterable) + next(b, None) + return zip(a, b) + + +def _assert_same_set(*sets_to_compare: Set[Any]) -> None: + for first, second in _pairwise(sets_to_compare): + assert first == second + + +def _get_file_hashes_in_path(path_to_hash: Path) -> Set[Tuple[Path, str]]: + def _hash_path(path: Path): + sha256_hash = hashlib.sha256() + with open(path, "rb") as f: + # Read and update hash string value in blocks of 4K + for byte_block in iter(lambda: f.read(4096), b""): + sha256_hash.update(byte_block) + return sha256_hash.hexdigest() + + def _relative_path(root_path: Path, full_path: Path) -> Path: + return full_path.relative_to(root_path) + + if path_to_hash.is_file(): + return {(_relative_path(path_to_hash, path_to_hash), _hash_path(path_to_hash))} + + return { + (_relative_path(path_to_hash, path), _hash_path(path)) + for path in path_to_hash.rglob("*") + } + + +LINE_PARTS_TO_MATCH = [ + (0, "INFO:simcore_service_dynamic_sidecar.modules.nodeports:Uploaded"), + (2, "bytes"), + (3, "in"), + (5, "seconds"), +] + + +def _is_matching_line_in_logs(logs: List[str]) -> bool: + for line in logs: + if LINE_PARTS_TO_MATCH[0][1] in line: + print("".join(logs)) + + line_parts = line.strip().split(" ") + for position, value in LINE_PARTS_TO_MATCH: + assert line_parts[position] == value + + return True + return False + + +async def _print_dynamic_sidecars_containers_logs_and_get_containers( + dynamic_services_urls: Dict[str, str] +) -> List[str]: + containers_names: List[str] = [] + for node_uuid, url in dynamic_services_urls.items(): + print(f"Containers logs for service {node_uuid} @ {url}") + async with httpx.AsyncClient(base_url=f"{url}/v1") as client: + containers_inspect_response = await client.get("/containers") + assert ( + containers_inspect_response.status_code == status.HTTP_200_OK + ), containers_inspect_response.text + containers_inspect = containers_inspect_response.json() + + # pylint: disable=unnecessary-comprehension + service_containers_names = [x for x in containers_inspect] + print("Containers:", service_containers_names) + for container_name in service_containers_names: + containers_names.append(container_name) + print(f"Fetching logs for {container_name}") + container_logs_response = await client.get( + f"/containers/{container_name}/logs" + ) + assert container_logs_response.status_code == status.HTTP_200_OK + logs = "".join(container_logs_response.json()) + print(f"Container {container_name} logs:\n{logs}") + + assert len(containers_names) == 3 + return containers_names + + +async def _print_container_inspect(container_id: str) -> None: + async with aiodocker.Docker() as docker_client: + container = await docker_client.containers.get(container_id) + container_inspect = await container.show() + print(f"Container {container_id} inspect:\n{pformat(container_inspect)}") + + +async def _print_all_docker_volumes() -> None: + async with aiodocker.Docker() as docker_client: + docker_volumes = await docker_client.volumes.list() + print(f"Detected volumes:\n{pformat(docker_volumes)}") + + +async def _assert_retrieve_completed( + director_v2_client: httpx.AsyncClient, + director_v0_url: URL, + service_uuid: str, + dynamic_services_urls: Dict[str, str], +) -> None: + await assert_retrieve_service( + director_v2_client=director_v2_client, + director_v0_url=director_v0_url, + service_uuid=service_uuid, + ) + + container_id = await _container_id_via_services(service_uuid) + + # look at dynamic-sidecar's logs to be sure when nodeports + # have been uploaded + async with aiodocker.Docker() as docker_client: + container: DockerContainer = await docker_client.containers.get(container_id) + + for i in range(TIMEOUT_OUTPUTS_UPLOAD_FINISH_DETECTED): + logs = await container.log(stdout=True, stderr=True) + + if _is_matching_line_in_logs(logs): + break + + if i == TIMEOUT_OUTPUTS_UPLOAD_FINISH_DETECTED - 1: + print(SEPARATOR) + print(f"Dumping information for service_uuid={service_uuid}") + print(SEPARATOR) + + print("".join(logs)) + print(SEPARATOR) + + containers_names = ( + await _print_dynamic_sidecars_containers_logs_and_get_containers( + dynamic_services_urls + ) + ) + print(SEPARATOR) + + # inspect dynamic-sidecar container + await _print_container_inspect(container_id=container_id) + print(SEPARATOR) + + # inspect spawned container + for container_name in containers_names: + await _print_container_inspect(container_id=container_name) + print(SEPARATOR) + + await _print_all_docker_volumes() + print(SEPARATOR) + + assert False, "Timeout reached" + + print( + ( + f"Sleeping {i+1}/{TIMEOUT_OUTPUTS_UPLOAD_FINISH_DETECTED} " + f"before searching logs from {service_uuid} again" + ) + ) + await asyncio.sleep(1) + + print(f"Nodeports outputs upload finish detected for {service_uuid}") + + +# TESTS + + +async def test_nodeports_integration( + # pylint: disable=too-many-arguments + minimal_configuration: None, + cleanup_services_and_networks: None, + update_project_workbench_with_comp_tasks: Callable, + client: TestClient, + db_manager: DBManager, + user_db: Dict, + current_study: ProjectAtDB, + services_endpoint: Dict[str, URL], + director_v2_client: httpx.AsyncClient, + workbench_dynamic_services: Dict[str, Node], + services_node_uuids: ServicesNodeUUIDs, + fake_dy_success: Dict[str, Any], + fake_dy_published: Dict[str, Any], + temp_dir: Path, + mocker: MockerFixture, +) -> None: + """ + Creates a new project with where the following connections + are defined: `sleeper:1.0.0` -> + `dy-static-file-server-dynamic-sidecar:2.0.0` -> + `dy-static-file-server-dynamic-sidecar-compose-spec:2.0.0`. + + Both `dy-static-file-server-*` services are able to map the + inputs of the service to the outputs. Both services also + generate an internal state which is to be persisted + between runs. + + Execution steps: + 1. start all the dynamic services and make sure they are running + 2. run the computational pipeline & trigger port retrievals + 3. check that the outputs of the `sleeper` are the same as the + outputs of the `dy-static-file-server-dynamic-sidecar-compose-spec`` + 4. fetch the "state" via `docker ` for both dynamic services + 5. start the dynamic-services and fetch the "state" via + `storage-data_manager API` for both dynamic services + 6. start the dynamic-services again, fetch the "state" via + `docker` for both dynamic services + 7. finally check that all states for both dynamic services match + """ + director_v0_url = get_director_v0_patched_url(services_endpoint["director"]) + + # STEP 1 + + _patch_postgres_address(director_v2_client) + + dynamic_services_urls: Dict[ + str, str + ] = await _wait_for_dynamic_services_to_be_running( + director_v2_client=director_v2_client, + director_v0_url=director_v0_url, + user_id=user_db["id"], + workbench_dynamic_services=workbench_dynamic_services, + current_study=current_study, + ) + + # STEP 2 + + response = create_pipeline( + client, + project=current_study, + user_id=user_db["id"], + start_pipeline=True, + expected_response_status_code=status.HTTP_201_CREATED, + ) + task_out = ComputationTaskOut.parse_obj(response.json()) + + # check the contents is correct: a pipeline that just started gets PUBLISHED + assert_computation_task_out_obj( + client, + task_out, + project=current_study, + exp_task_state=RunningState.PUBLISHED, + exp_pipeline_details=PipelineDetails.parse_obj(fake_dy_published), + ) + + # wait for the computation to start + assert_pipeline_status( + client, + task_out.url, + user_db["id"], + current_study.uuid, + wait_for_states=[RunningState.STARTED], + ) + + # wait for the computation to finish (either by failing, success or abort) + task_out = assert_pipeline_status( + client, task_out.url, user_db["id"], current_study.uuid + ) + + assert_computation_task_out_obj( + client, + task_out, + project=current_study, + exp_task_state=RunningState.SUCCESS, + exp_pipeline_details=PipelineDetails.parse_obj(fake_dy_success), + ) + + update_project_workbench_with_comp_tasks(str(current_study.uuid)) + + # Trigger inputs pulling & outputs pushing on dynamic services + + # Since there is no webserver monitoring postgres notifications + # trigger the call manually + + # dump logs form started containers before retrieve + await _print_dynamic_sidecars_containers_logs_and_get_containers( + dynamic_services_urls + ) + + await _assert_retrieve_completed( + director_v2_client=director_v2_client, + director_v0_url=director_v0_url, + service_uuid=services_node_uuids.dy, + dynamic_services_urls=dynamic_services_urls, + ) + + await _assert_retrieve_completed( + director_v2_client=director_v2_client, + director_v0_url=director_v0_url, + service_uuid=services_node_uuids.dy_compose_spec, + dynamic_services_urls=dynamic_services_urls, + ) + + # STEP 3 + # pull data via nodeports + + # storage config.py resolves env vars at import time, unlike newer settingslib + # configuration. patching the module with the correct url + mocker.patch( + "simcore_sdk.node_ports_common.config.STORAGE_ENDPOINT", + str(services_endpoint["storage"]).replace("http://", ""), + ) + + mapped_nodeports_values = await _get_mapped_nodeports_values( + user_db["id"], str(current_study.uuid), current_study.workbench, db_manager + ) + await _assert_port_values(mapped_nodeports_values, services_node_uuids) + + # STEP 4 + + dy_path_container_before = await _fetch_data_from_container( + dir_tag="dy", service_uuid=services_node_uuids.dy, temp_dir=temp_dir + ) + dy_compose_spec_path_container_before = await _fetch_data_from_container( + dir_tag="dy_compose_spec", + service_uuid=services_node_uuids.dy_compose_spec, + temp_dir=temp_dir, + ) + + # STEP 5 + + # stop the services to make sure the data is saved to storage + await asyncio.gather( + *( + assert_stop_service( + director_v2_client=director_v2_client, + director_v0_url=director_v0_url, + service_uuid=service_uuid, + ) + for service_uuid in workbench_dynamic_services + ) + ) + + await _wait_for_dy_services_to_fully_stop(director_v2_client) + + dy_path_data_manager_before = await _fetch_data_via_data_manager( + dir_tag="dy", + user_id=user_db["id"], + project_id=str(current_study.uuid), + service_uuid=services_node_uuids.dy, + temp_dir=temp_dir, + ) + + dy_compose_spec_path_data_manager_before = await _fetch_data_via_data_manager( + dir_tag="dy_compose_spec", + user_id=user_db["id"], + project_id=str(current_study.uuid), + service_uuid=services_node_uuids.dy_compose_spec, + temp_dir=temp_dir, + ) + + # STEP 6 + + await _wait_for_dynamic_services_to_be_running( + director_v2_client=director_v2_client, + director_v0_url=director_v0_url, + user_id=user_db["id"], + workbench_dynamic_services=workbench_dynamic_services, + current_study=current_study, + ) + + dy_path_container_after = await _fetch_data_from_container( + dir_tag="dy", service_uuid=services_node_uuids.dy, temp_dir=temp_dir + ) + dy_compose_spec_path_container_after = await _fetch_data_from_container( + dir_tag="dy_compose_spec", + service_uuid=services_node_uuids.dy_compose_spec, + temp_dir=temp_dir, + ) + + # STEP 7 + + _assert_same_set( + _get_file_hashes_in_path(dy_path_container_before), + _get_file_hashes_in_path(dy_path_data_manager_before), + _get_file_hashes_in_path(dy_path_container_after), + ) + + _assert_same_set( + _get_file_hashes_in_path(dy_compose_spec_path_container_before), + _get_file_hashes_in_path(dy_compose_spec_path_data_manager_before), + _get_file_hashes_in_path(dy_compose_spec_path_container_after), + ) diff --git a/services/director-v2/tests/integration/02/test_dynamic_services_routes.py b/services/director-v2/tests/integration/02/test_dynamic_services_routes.py index a8adda7b925..e900c87d8f7 100644 --- a/services/director-v2/tests/integration/02/test_dynamic_services_routes.py +++ b/services/director-v2/tests/integration/02/test_dynamic_services_routes.py @@ -12,6 +12,7 @@ from async_asgi_testclient.response import Response from async_timeout import timeout from pydantic import PositiveInt +from pytest_mock.plugin import MockerFixture from simcore_service_director_v2.core.application import init_app from simcore_service_director_v2.core.settings import AppSettings from utils import ensure_network_cleanup, patch_dynamic_service_url @@ -100,7 +101,9 @@ async def test_client( @pytest.fixture -async def ensure_services_stopped(start_request_data: Dict[str, Any]) -> None: +async def ensure_services_stopped( + start_request_data: Dict[str, Any], test_client: TestClient +) -> None: yield # ensure service cleanup when done testing async with aiodocker.Docker() as docker_client: @@ -113,14 +116,39 @@ async def ensure_services_stopped(start_request_data: Dict[str, Any]) -> None: delete_result = await docker_client.services.delete(service_name) assert delete_result is True + scheduler_interval = ( + test_client.application.state.settings.DYNAMIC_SERVICES.DYNAMIC_SCHEDULER.DIRECTOR_V2_DYNAMIC_SCHEDULER_INTERVAL_SECONDS + ) + # sleep enough to ensure the observation cycle properly stopped the service + await asyncio.sleep(2 * scheduler_interval) + await ensure_network_cleanup(docker_client, project_id) +@pytest.fixture +def mock_service_state(mocker: MockerFixture) -> None: + """because the monitor is disabled some functionality needs to be mocked""" + + mocker.patch( + "simcore_service_director_v2.modules.dynamic_sidecar.client_api.DynamicSidecarClient.service_save_state", + side_effect=lambda *args, **kwargs: None, + ) + + mocker.patch( + "simcore_service_director_v2.modules.dynamic_sidecar.client_api.DynamicSidecarClient.service_restore_state", + side_effect=lambda *args, **kwargs: None, + ) + + +# TESTS + + async def test_start_status_stop( test_client: TestClient, node_uuid: str, start_request_data: Dict[str, Any], ensure_services_stopped: None, + mock_service_state: None, ): # starting the service headers = { diff --git a/services/director-v2/tests/integration/02/test_mixed_dynamic_sidecar_and_legacy_project.py b/services/director-v2/tests/integration/02/test_mixed_dynamic_sidecar_and_legacy_project.py index 9d720f01d42..fb17f7d996b 100644 --- a/services/director-v2/tests/integration/02/test_mixed_dynamic_sidecar_and_legacy_project.py +++ b/services/director-v2/tests/integration/02/test_mixed_dynamic_sidecar_and_legacy_project.py @@ -3,29 +3,31 @@ # pylint:disable=too-many-arguments import asyncio +import logging import os -from typing import Any, Callable, Dict, Optional, Union +from typing import Callable, Dict from uuid import uuid4 import aiodocker import httpx import pytest -import requests import sqlalchemy as sa -import tenacity from asgi_lifespan import LifespanManager -from async_timeout import timeout -from models_library.projects import Node, ProjectAtDB +from models_library.projects import ProjectAtDB from models_library.settings.rabbit import RabbitConfig from models_library.settings.redis import RedisConfig -from pydantic.types import PositiveInt from simcore_service_director_v2.core.application import init_app from simcore_service_director_v2.core.settings import AppSettings -from simcore_service_director_v2.models.schemas.constants import ( - DYNAMIC_PROXY_SERVICE_PREFIX, - DYNAMIC_SIDECAR_SERVICE_PREFIX, +from utils import ( + assert_all_services_running, + assert_services_reply_200, + assert_start_service, + assert_stop_service, + ensure_network_cleanup, + get_director_v0_patched_url, + is_legacy, + patch_dynamic_service_url, ) -from utils import ensure_network_cleanup, patch_dynamic_service_url from yarl import URL pytest_simcore_core_services_selection = [ @@ -34,10 +36,15 @@ "rabbit", "catalog", "director", + "storage", ] -HTTPX_CLIENT_TIMOUT = 10 -SERVICES_ARE_READY_TIMEOUT = 10 * 60 +pytest_simcore_ops_services_selection = [ + "minio", +] + +logger = logging.getLogger(__name__) + # FIXTURES @@ -120,12 +127,25 @@ def _assemble_node_data(spec: Dict, label: str) -> Dict[str, str]: async def director_v2_client( minimal_configuration: None, loop: asyncio.BaseEventLoop, - mock_env: None, network_name: str, monkeypatch, ) -> httpx.AsyncClient: + # Works as below line in docker.compose.yml + # ${DOCKER_REGISTRY:-itisfoundation}/dynamic-sidecar:${DOCKER_IMAGE_TAG:-latest} + + registry = os.environ.get("DOCKER_REGISTRY", "local") + image_tag = os.environ.get("DOCKER_IMAGE_TAG", "production") + + image_name = f"{registry}/dynamic-sidecar:{image_tag}" + + logger.warning("Patching to: DYNAMIC_SIDECAR_IMAGE=%s", image_name) + monkeypatch.setenv("DYNAMIC_SIDECAR_IMAGE", image_name) + monkeypatch.setenv("TRAEFIK_SIMCORE_ZONE", "test_traefik_zone") + monkeypatch.setenv("SWARM_STACK_NAME", "test_swarm_name") + monkeypatch.setenv("SC_BOOT_MODE", "production") monkeypatch.setenv("DYNAMIC_SIDECAR_EXPOSE_PORT", "true") + monkeypatch.setenv("PROXY_EXPOSE_PORT", "true") monkeypatch.setenv("SIMCORE_SERVICES_NETWORK_NAME", network_name) monkeypatch.delenv("DYNAMIC_SIDECAR_MOUNT_PATH_DEV", raising=False) monkeypatch.setenv("DIRECTOR_V2_DYNAMIC_SCHEDULER_ENABLED", "true") @@ -151,7 +171,9 @@ async def director_v2_client( @pytest.fixture -async def ensure_services_stopped(dy_static_file_server_project: ProjectAtDB) -> None: +async def ensure_services_stopped( + dy_static_file_server_project: ProjectAtDB, director_v2_client: httpx.AsyncClient +) -> None: yield # ensure service cleanup when done testing async with aiodocker.Docker() as docker_client: @@ -166,192 +188,17 @@ async def ensure_services_stopped(dy_static_file_server_project: ProjectAtDB) -> assert delete_result is True project_id = f"{dy_static_file_server_project.uuid}" - await ensure_network_cleanup(docker_client, project_id) - -# UTILS - - -async def _handle_307_if_required( - director_v2_client: httpx.AsyncClient, director_v0_url: URL, result: httpx.Response -) -> Union[httpx.Response, requests.Response]: - def _debug_print( - result: Union[httpx.Response, requests.Response], heading_text: str - ) -> None: - print( - ( - f"{heading_text}\n>>>\n{result.request.method}\n" - f"{result.request.url}\n{result.request.headers}\n" - f"<<<\n{result.status_code}\n{result.headers}\n{result.text}\n" - ) + # pylint: disable=protected-access + scheduler_interval = ( + director_v2_client._transport.app.state.settings.DYNAMIC_SERVICES.DYNAMIC_SCHEDULER.DIRECTOR_V2_DYNAMIC_SCHEDULER_INTERVAL_SECONDS ) - - if result.next_request is not None: - _debug_print(result, "REDIRECTING[1/2] DV2") - - # replace url endpoint for director-v0 in redirect - result.next_request.url = httpx.URL( - str(result.next_request.url).replace( - "http://director:8080", str(director_v0_url) - ) - ) - - # when both director-v0 and director-v2 were running in containers - # it was possible to use httpx for GET requests as well - # since director-v2 is now started on the host directly, - # a 405 Method Not Allowed is returned - # using requests is workaround for the issue - if result.request.method == "GET": - redirect_result = requests.get(str(result.next_request.url)) - else: - redirect_result = await director_v2_client.send(result.next_request) - - _debug_print(redirect_result, "REDIRECTING[2/2] DV0") - - return redirect_result - - return result - - -async def _assert_start_service( - director_v2_client: httpx.AsyncClient, - director_v0_url: URL, - user_id: int, - project_id: str, - service_key: str, - service_version: str, - service_uuid: str, - basepath: Optional[str], -) -> None: - data = dict( - user_id=user_id, - project_id=project_id, - service_key=service_key, - service_version=service_version, - service_uuid=service_uuid, - basepath=basepath, - ) - headers = { - "x-dynamic-sidecar-request-dns": director_v2_client.base_url.host, - "x-dynamic-sidecar-request-scheme": director_v2_client.base_url.scheme, - } - - result = await director_v2_client.post( - "/dynamic_services", json=data, headers=headers, allow_redirects=False - ) - result = await _handle_307_if_required(director_v2_client, director_v0_url, result) - assert result.status_code == 201, result.text - - -def _is_legacy(node_data: Node) -> bool: - return node_data.label == "LEGACY" - - -async def _get_service_data( - director_v2_client: httpx.AsyncClient, - director_v0_url: URL, - service_uuid: str, - node_data: Node, -) -> Dict[str, Any]: - result = await director_v2_client.get( - f"/dynamic_services/{service_uuid}", allow_redirects=False - ) - result = await _handle_307_if_required(director_v2_client, director_v0_url, result) - assert result.status_code == 200, result.text - - payload = result.json() - data = payload["data"] if _is_legacy(node_data) else payload - return data - - -async def _get_service_state( - director_v2_client: httpx.AsyncClient, - director_v0_url: URL, - service_uuid: str, - node_data: Node, -) -> str: - data = await _get_service_data( - director_v2_client, director_v0_url, service_uuid, node_data - ) - print("STATUS_RESULT", node_data.label, data["service_state"]) - return data["service_state"] - - -async def _assert_stop_service( - director_v2_client: httpx.AsyncClient, director_v0_url: URL, service_uuid: str -) -> None: - result = await director_v2_client.delete( - f"/dynamic_services/{service_uuid}", allow_redirects=False - ) - result = await _handle_307_if_required(director_v2_client, director_v0_url, result) - assert result.status_code == 204 - assert result.text == "" - - -def _run_command(command: str) -> str: - # using asyncio.create_subprocess_shell is slower - # and sometimes ir randomly hangs forever - - print(f"Running: '{command}'") - command_result = os.popen(command).read() - print(command_result) - return command_result - - -async def _port_forward_service( - service_name: str, is_legacy: bool, internal_port: PositiveInt -) -> PositiveInt: - """Updates the service configuration and makes it so it can be used""" - # By updating the service spec the container will be recreated. - # It works in this case, since we do not care about the internal - # state of the application - target_service = service_name - - if is_legacy: - # Legacy services are started --endpoint-mode dnsrr, it needs to - # be changed to vip otherwise the port forward will not work - result = _run_command( - f"docker service update {service_name} --endpoint-mode=vip" - ) - assert "verify: Service converged" in result - else: - # For a non legacy service, the service_name points to the dynamic-sidecar, - # but traffic is handeled by the proxy, - target_service = service_name.replace( - DYNAMIC_SIDECAR_SERVICE_PREFIX, DYNAMIC_PROXY_SERVICE_PREFIX - ) - - # Finally forward the port on a random assigned port. - result = _run_command( - f"docker service update {target_service} --publish-add :{internal_port}" - ) - assert "verify: Service converged" in result - - # inspect service and fetch the port - async with aiodocker.Docker() as docker_client: - service_details = await docker_client.services.inspect(target_service) - ports = service_details["Endpoint"]["Ports"] - - assert len(ports) == 1, service_details - exposed_port = ports[0]["PublishedPort"] - return exposed_port - - -async def _assert_service_is_available(exposed_port: PositiveInt) -> None: - service_address = f"http://172.17.0.1:{exposed_port}" - print(f"checking service @ {service_address}") - - async for attempt in tenacity.AsyncRetrying( - wait=tenacity.wait_exponential(), stop=tenacity.stop_after_delay(15) - ): - with attempt: - async with httpx.AsyncClient() as client: - response = await client.get(service_address) - assert response.status_code == 200 + # sleep enough to ensure the observation cycle properly stopped the service + await asyncio.sleep(2 * scheduler_interval) + await ensure_network_cleanup(docker_client, project_id) -def _get_director_v0_patched_url(url: URL) -> URL: - return URL(str(url).replace("127.0.0.1", "172.17.0.1")) +# UTILS # TESTS @@ -373,12 +220,11 @@ async def test_legacy_and_dynamic_sidecar_run( - dy-static-file-server-dynamic-sidecar - dy-static-file-server-dynamic-sidecar-compose """ - director_v0_url = _get_director_v0_patched_url(services_endpoint["director"]) + director_v0_url = get_director_v0_patched_url(services_endpoint["director"]) - services_to_start = [] - for service_uuid, node in dy_static_file_server_project.workbench.items(): - services_to_start.append( - _assert_start_service( + await asyncio.gather( + *( + assert_start_service( director_v2_client=director_v2_client, director_v0_url=director_v0_url, user_id=user_db["id"], @@ -386,13 +232,14 @@ async def test_legacy_and_dynamic_sidecar_run( service_key=node.key, service_version=node.version, service_uuid=service_uuid, - basepath=f"/x/{service_uuid}" if _is_legacy(node) else None, + basepath=f"/x/{service_uuid}" if is_legacy(node) else None, ) + for service_uuid, node in dy_static_file_server_project.workbench.items() ) - await asyncio.gather(*services_to_start) + ) for service_uuid, node in dy_static_file_server_project.workbench.items(): - if _is_legacy(node): + if is_legacy(node): continue await patch_dynamic_service_url( @@ -403,59 +250,27 @@ async def test_legacy_and_dynamic_sidecar_run( assert len(dy_static_file_server_project.workbench) == 3 - async with timeout(SERVICES_ARE_READY_TIMEOUT): - not_all_services_running = True - - while not_all_services_running: - service_states = [ - _get_service_state( - director_v2_client=director_v2_client, - director_v0_url=director_v0_url, - service_uuid=dynamic_service_uuid, - node_data=node_data, - ) - for dynamic_service_uuid, node_data in dy_static_file_server_project.workbench.items() - ] - are_services_running = [ - x == "running" for x in await asyncio.gather(*service_states) - ] - not_all_services_running = not all(are_services_running) - # let the services boot - await asyncio.sleep(1.0) + await assert_all_services_running( + director_v2_client, + director_v0_url, + workbench=dy_static_file_server_project.workbench, + ) # query the service directly and check if it responding accordingly - for ( - dynamic_service_uuid, - node_data, - ) in dy_static_file_server_project.workbench.items(): - service_data = await _get_service_data( - director_v2_client=director_v2_client, - director_v0_url=director_v0_url, - service_uuid=dynamic_service_uuid, - node_data=node_data, - ) - print( - "Checking running service availability", - dynamic_service_uuid, - node_data, - service_data, - ) - exposed_port = await _port_forward_service( - service_name=service_data["service_host"], - is_legacy=_is_legacy(node_data), - internal_port=service_data["service_port"], - ) - - await _assert_service_is_available(exposed_port) + await assert_services_reply_200( + director_v2_client=director_v2_client, + director_v0_url=director_v0_url, + workbench=dy_static_file_server_project.workbench, + ) # finally stop the started services - services_to_stop = [] - for service_uuid in dy_static_file_server_project.workbench: - services_to_stop.append( - _assert_stop_service( + await asyncio.gather( + *( + assert_stop_service( director_v2_client=director_v2_client, director_v0_url=director_v0_url, service_uuid=service_uuid, ) + for service_uuid in dy_static_file_server_project.workbench ) - await asyncio.gather(*services_to_stop) + ) diff --git a/services/director-v2/tests/integration/02/utils.py b/services/director-v2/tests/integration/02/utils.py index 3bafb52fc12..67d623c144d 100644 --- a/services/director-v2/tests/integration/02/utils.py +++ b/services/director-v2/tests/integration/02/utils.py @@ -1,16 +1,40 @@ +# pylint: disable=redefined-outer-name + import asyncio +import json +import os +from typing import Any, Dict, Optional, Union import aiodocker +import httpx +import requests from async_timeout import timeout from fastapi import FastAPI +from models_library.projects import Node +from pydantic import PositiveInt from simcore_service_director_v2.models.schemas.constants import ( + DYNAMIC_PROXY_SERVICE_PREFIX, DYNAMIC_SIDECAR_SERVICE_PREFIX, ) from simcore_service_director_v2.modules.dynamic_sidecar.scheduler import ( DynamicSidecarsScheduler, ) +from tenacity._asyncio import AsyncRetrying +from tenacity.stop import stop_after_attempt +from tenacity.wait import wait_fixed +from yarl import URL SERVICE_WAS_CREATED_BY_DIRECTOR_V2 = 20 +SERVICES_ARE_READY_TIMEOUT = 10 * 60 +SEPARATOR = "=" * 50 + + +def is_legacy(node_data: Node) -> bool: + return node_data.label == "LEGACY" + + +def get_director_v0_patched_url(url: URL) -> URL: + return URL(str(url).replace("127.0.0.1", "172.17.0.1")) async def ensure_network_cleanup( @@ -28,13 +52,13 @@ async def ensure_network_cleanup( delete_result = await network.delete() assert delete_result is True except aiodocker.exceptions.DockerError as e: - # if the tests succeeds the network will nto exists + # if the tests succeeds the network will not exists str_error = str(e) assert "network" in str_error assert "not found" in str_error -async def patch_dynamic_service_url(app: FastAPI, node_uuid: str) -> None: +async def patch_dynamic_service_url(app: FastAPI, node_uuid: str) -> str: """ Normally director-v2 talks via docker-netwoks with the dynamic-sidecar. Since the director-v2 was started outside docker and is not @@ -63,6 +87,7 @@ async def patch_dynamic_service_url(app: FastAPI, node_uuid: str) -> None: # patch the endppoint inside the scheduler scheduler: DynamicSidecarsScheduler = app.state.dynamic_sidecar_scheduler + endpoint: Optional[str] = None async with scheduler._lock: # pylint: disable=protected-access for entry in scheduler._to_observe.values(): # pylint: disable=protected-access if entry.scheduler_data.service_name == service_name: @@ -72,3 +97,354 @@ async def patch_dynamic_service_url(app: FastAPI, node_uuid: str) -> None: endpoint = entry.scheduler_data.dynamic_sidecar.endpoint assert endpoint == f"http://172.17.0.1:{port}" break + + assert endpoint is not None + return endpoint + + +async def _get_proxy_port(node_uuid: str) -> PositiveInt: + """ + Normally director-v2 talks via docker-netwoks with the started proxy. + Since the director-v2 was started outside docker and is not + running in a container, the service port needs to be exposed and the + url needs to be changed to 172.17.0.1 (docker localhost) + + returns: the local endpoint + """ + service_name = f"{DYNAMIC_PROXY_SERVICE_PREFIX}_{node_uuid}" + port = None + + async with aiodocker.Docker() as docker_client: + async with timeout(SERVICE_WAS_CREATED_BY_DIRECTOR_V2): + # it takes a bit of time for the port to be auto generated + # keep trying until it is there + while port is None: + services = await docker_client.services.list() + for service in services: + if service["Spec"]["Name"] == service_name: + ports = service["Endpoint"].get("Ports", []) + if len(ports) == 1: + port = ports[0]["PublishedPort"] + break + + await asyncio.sleep(1) + + assert port is not None + return port + + +async def handle_307_if_required( + director_v2_client: httpx.AsyncClient, director_v0_url: URL, result: httpx.Response +) -> Union[httpx.Response, requests.Response]: + def _debug_print( + result: Union[httpx.Response, requests.Response], heading_text: str + ) -> None: + print( + ( + f"{heading_text}\n>>>\n{result.request.method}\n" + f"{result.request.url}\n{result.request.headers}\n" + f"<<<\n{result.status_code}\n{result.headers}\n{result.text}\n" + ) + ) + + if result.next_request is not None: + _debug_print(result, "REDIRECTING[1/2] DV2") + + # replace url endpoint for director-v0 in redirect + result.next_request.url = httpx.URL( + str(result.next_request.url).replace( + "http://director:8080", str(director_v0_url) + ) + ) + + # when both director-v0 and director-v2 were running in containers + # it was possible to use httpx for GET requests as well + # since director-v2 is now started on the host directly, + # a 405 Method Not Allowed is returned + # using requests is workaround for the issue + if result.request.method == "GET": + redirect_result = requests.get(str(result.next_request.url)) + else: + redirect_result = await director_v2_client.send(result.next_request) + + _debug_print(redirect_result, "REDIRECTING[2/2] DV0") + + return redirect_result + + return result + + +async def assert_start_service( + director_v2_client: httpx.AsyncClient, + director_v0_url: URL, + user_id: int, + project_id: str, + service_key: str, + service_version: str, + service_uuid: str, + basepath: Optional[str], +) -> None: + data = dict( + user_id=user_id, + project_id=project_id, + service_key=service_key, + service_version=service_version, + service_uuid=service_uuid, + basepath=basepath, + ) + headers = { + "x-dynamic-sidecar-request-dns": director_v2_client.base_url.host, + "x-dynamic-sidecar-request-scheme": director_v2_client.base_url.scheme, + } + + result = await director_v2_client.post( + "/dynamic_services", json=data, headers=headers, allow_redirects=False + ) + result = await handle_307_if_required(director_v2_client, director_v0_url, result) + assert result.status_code == 201, result.text + + +async def get_service_data( + director_v2_client: httpx.AsyncClient, + director_v0_url: URL, + service_uuid: str, + node_data: Node, +) -> Dict[str, Any]: + result = await director_v2_client.get( + f"/dynamic_services/{service_uuid}", allow_redirects=False + ) + result = await handle_307_if_required(director_v2_client, director_v0_url, result) + assert result.status_code == 200, result.text + + payload = result.json() + data = payload["data"] if is_legacy(node_data) else payload + return data + + +async def _get_service_state( + director_v2_client: httpx.AsyncClient, + director_v0_url: URL, + service_uuid: str, + node_data: Node, +) -> str: + data = await get_service_data( + director_v2_client, director_v0_url, service_uuid, node_data + ) + print("STATUS_RESULT", node_data.label, data["service_state"]) + return data["service_state"] + + +async def assert_all_services_running( + director_v2_client: httpx.AsyncClient, + director_v0_url: URL, + workbench: Dict[str, Node], +) -> None: + async with timeout(SERVICES_ARE_READY_TIMEOUT): + not_all_services_running = True + + while not_all_services_running: + service_states = await asyncio.gather( + *( + _get_service_state( + director_v2_client=director_v2_client, + director_v0_url=director_v0_url, + service_uuid=dynamic_service_uuid, + node_data=node_data, + ) + for dynamic_service_uuid, node_data in workbench.items() + ) + ) + + # check that no service has failed + for service_state in service_states: + assert service_state != "failed" + + are_services_running = [x == "running" for x in service_states] + not_all_services_running = not all(are_services_running) + # let the services boot + await asyncio.sleep(1.0) + + +async def assert_retrieve_service( + director_v2_client: httpx.AsyncClient, director_v0_url: URL, service_uuid: str +) -> None: + headers = { + "x-dynamic-sidecar-request-dns": director_v2_client.base_url.host, + "x-dynamic-sidecar-request-scheme": director_v2_client.base_url.scheme, + } + + result = await director_v2_client.post( + f"/dynamic_services/{service_uuid}:retrieve", + json=dict(port_keys=[]), + headers=headers, + allow_redirects=False, + ) + result = await handle_307_if_required(director_v2_client, director_v0_url, result) + + assert result.status_code == 200, result.text + json_result = result.json() + print(f"{service_uuid}:retrieve result ", json_result) + + size_bytes = json_result["data"]["size_bytes"] + assert size_bytes > 0 + assert type(size_bytes) == int + + +async def assert_stop_service( + director_v2_client: httpx.AsyncClient, director_v0_url: URL, service_uuid: str +) -> None: + result = await director_v2_client.delete( + f"/dynamic_services/{service_uuid}", allow_redirects=False + ) + result = await handle_307_if_required(director_v2_client, director_v0_url, result) + assert result.status_code == 204 + assert result.text == "" + + +async def _inspect_service_and_print_logs( + tag: str, service_name: str, is_legacy: bool +) -> None: + """inspects proxy and prints logs from it""" + if is_legacy: + print(f"Skipping service logs and inspect for {service_name}") + return + + target_service = service_name.replace( + DYNAMIC_SIDECAR_SERVICE_PREFIX, DYNAMIC_PROXY_SERVICE_PREFIX + ) + + async with aiodocker.Docker() as docker_client: + service_details = await docker_client.services.inspect(target_service) + + print(f"{SEPARATOR} - {tag}\nService inspect: {target_service}") + + formatted_inspect = json.dumps(service_details, indent=2) + print(f"{formatted_inspect}\n{SEPARATOR}") + + # print containers inspect to see them all + for container in await docker_client.containers.list(): + container_inspect = await container.show() + formatted_container_inspect = json.dumps(container_inspect, indent=2) + container_name = container_inspect["Name"][1:] + print(f"Container inspect: {container_name}") + print(f"{formatted_container_inspect}\n{SEPARATOR}") + + logs = await docker_client.services.logs( + service_details["ID"], stdout=True, stderr=True + ) + formatted_logs = "".join(logs) + print(f"{formatted_logs}\n{SEPARATOR} - {tag}") + + +def _run_command(command: str) -> str: + # using asyncio.create_subprocess_shell is slower + # and sometimes ir randomly hangs forever + + print(f"Running: '{command}'") + command_result = os.popen(command).read() + print(command_result) + return command_result + + +async def _port_forward_legacy_service( # pylint: disable=redefined-outer-name + service_name: str, internal_port: PositiveInt +) -> PositiveInt: + """Updates the service configuration and makes it so it can be used""" + # By updating the service spec the container will be recreated. + # It works in this case, since we do not care about the internal + # state of the application + + # Legacy services are started --endpoint-mode dnsrr, it needs to + # be changed to vip otherwise the port forward will not work + result = _run_command(f"docker service update {service_name} --endpoint-mode=vip") + assert "verify: Service converged" in result + + # Finally forward the port on a random assigned port. + result = _run_command( + f"docker service update {service_name} --publish-add :{internal_port}" + ) + assert "verify: Service converged" in result + + # inspect service and fetch the port + async with aiodocker.Docker() as docker_client: + service_details = await docker_client.services.inspect(service_name) + ports = service_details["Endpoint"]["Ports"] + + assert len(ports) == 1, service_details + exposed_port = ports[0]["PublishedPort"] + return exposed_port + + +async def assert_service_is_available( # pylint: disable=redefined-outer-name + exposed_port: PositiveInt, is_legacy: bool, service_uuid: str +) -> None: + service_address = ( + f"http://172.17.0.1:{exposed_port}/x/{service_uuid}" + if is_legacy + else f"http://172.17.0.1:{exposed_port}" + ) + print(f"checking service @ {service_address}") + + async for attempt in AsyncRetrying( + wait=wait_fixed(1), stop=stop_after_attempt(60), reraise=True + ): + with attempt: + async with httpx.AsyncClient() as client: + response = await client.get(service_address) + print(f"{SEPARATOR}\nAttempt={attempt.retry_state.attempt_number}") + print( + f"Body:\n{response.text}\nHeaders={response.headers}\n{SEPARATOR}" + ) + assert response.status_code == 200, response.text + + +async def assert_services_reply_200( + director_v2_client: httpx.AsyncClient, + director_v0_url: URL, + workbench: Dict[str, Node], +) -> None: + for service_uuid, node_data in workbench.items(): + service_data = await get_service_data( + director_v2_client=director_v2_client, + director_v0_url=director_v0_url, + service_uuid=service_uuid, + node_data=node_data, + ) + print( + "Checking running service availability", + service_uuid, + node_data, + service_data, + ) + + await _inspect_service_and_print_logs( + tag=f"before_port_forward {service_uuid}", + service_name=service_data["service_host"], + is_legacy=is_legacy(node_data), + ) + exposed_port = ( + await _port_forward_legacy_service( + service_name=service_data["service_host"], + internal_port=service_data["service_port"], + ) + if is_legacy(node_data) + else await _get_proxy_port(node_uuid=service_uuid) + ) + await _inspect_service_and_print_logs( + tag=f"after_port_forward {service_uuid}", + service_name=service_data["service_host"], + is_legacy=is_legacy(node_data), + ) + + try: + await assert_service_is_available( + exposed_port=exposed_port, + is_legacy=is_legacy(node_data), + service_uuid=service_uuid, + ) + finally: + await _inspect_service_and_print_logs( + tag=f"after_service_is_available {service_uuid}", + service_name=service_data["service_host"], + is_legacy=is_legacy(node_data), + ) diff --git a/services/director-v2/tests/integration/conftest.py b/services/director-v2/tests/integration/conftest.py new file mode 100644 index 00000000000..2ffc3318b0b --- /dev/null +++ b/services/director-v2/tests/integration/conftest.py @@ -0,0 +1,34 @@ +from typing import Callable + +import pytest +import sqlalchemy as sa +from simcore_postgres_database.models.comp_tasks import comp_tasks +from simcore_postgres_database.models.projects import projects + + +@pytest.fixture +def update_project_workbench_with_comp_tasks(postgres_db: sa.engine.Engine) -> Callable: + def updator(project_uuid: str): + with postgres_db.connect() as con: + result = con.execute( + projects.select().where(projects.c.uuid == project_uuid) + ) + prj_row = result.first() + prj_workbench = prj_row.workbench + + result = con.execute( + comp_tasks.select().where(comp_tasks.c.project_id == project_uuid) + ) + # let's get the results and run_hash + for task_row in result: + # pass these to the project workbench + prj_workbench[task_row.node_id]["outputs"] = task_row.outputs + prj_workbench[task_row.node_id]["runHash"] = task_row.run_hash + + con.execute( + projects.update() # pylint:disable=no-value-for-parameter + .values(workbench=prj_workbench) + .where(projects.c.uuid == project_uuid) + ) + + yield updator diff --git a/services/director-v2/tests/integration/shared_comp_utils.py b/services/director-v2/tests/integration/shared_comp_utils.py new file mode 100644 index 00000000000..6ebe574c30b --- /dev/null +++ b/services/director-v2/tests/integration/shared_comp_utils.py @@ -0,0 +1,98 @@ +from typing import List +from uuid import UUID + +from models_library.projects import ProjectAtDB +from models_library.projects_pipeline import PipelineDetails +from models_library.projects_state import RunningState +from pydantic.networks import AnyHttpUrl +from pydantic.types import PositiveInt +from requests.models import Response +from simcore_service_director_v2.models.schemas.comp_tasks import ComputationTaskOut +from starlette import status +from starlette.testclient import TestClient +from tenacity import retry, retry_if_exception_type, stop_after_delay, wait_random + +COMPUTATION_URL: str = "v2/computations" + + +def create_pipeline( + client: TestClient, + *, + project: ProjectAtDB, + user_id: PositiveInt, + start_pipeline: bool, + expected_response_status_code: int, + **kwargs, +) -> Response: + response = client.post( + COMPUTATION_URL, + json={ + "user_id": user_id, + "project_id": str(project.uuid), + "start_pipeline": start_pipeline, + **kwargs, + }, + ) + assert ( + response.status_code == expected_response_status_code + ), f"response code is {response.status_code}, error: {response.text}" + return response + + +def assert_computation_task_out_obj( + client: TestClient, + task_out: ComputationTaskOut, + *, + project: ProjectAtDB, + exp_task_state: RunningState, + exp_pipeline_details: PipelineDetails, +): + assert task_out.id == project.uuid + assert task_out.state == exp_task_state + assert task_out.url == f"{client.base_url}/v2/computations/{project.uuid}" + assert task_out.stop_url == ( + f"{client.base_url}/v2/computations/{project.uuid}:stop" + if exp_task_state in [RunningState.PUBLISHED, RunningState.PENDING] + else None + ) + # check pipeline details contents + assert task_out.pipeline_details == exp_pipeline_details + + +def assert_pipeline_status( + client: TestClient, + url: AnyHttpUrl, + user_id: PositiveInt, + project_uuid: UUID, + wait_for_states: List[RunningState] = None, +) -> ComputationTaskOut: + if not wait_for_states: + wait_for_states = [ + RunningState.SUCCESS, + RunningState.FAILED, + RunningState.ABORTED, + ] + + MAX_TIMEOUT_S = 60 + + @retry( + stop=stop_after_delay(MAX_TIMEOUT_S), + wait=wait_random(0, 2), + retry=retry_if_exception_type(AssertionError), + reraise=True, + ) + def check_pipeline_state() -> ComputationTaskOut: + response = client.get(url, params={"user_id": user_id}) + assert ( + response.status_code == status.HTTP_202_ACCEPTED + ), f"response code is {response.status_code}, error: {response.text}" + task_out = ComputationTaskOut.parse_obj(response.json()) + assert task_out.id == project_uuid + assert task_out.url == f"{client.base_url}/v2/computations/{project_uuid}" + print("Pipeline is in ", task_out.state) + assert task_out.state in wait_for_states + return task_out + + task_out = check_pipeline_state() + + return task_out diff --git a/services/director-v2/tests/mocks/fake_dy_status_published.json b/services/director-v2/tests/mocks/fake_dy_status_published.json new file mode 100644 index 00000000000..84bca67c1ae --- /dev/null +++ b/services/director-v2/tests/mocks/fake_dy_status_published.json @@ -0,0 +1,12 @@ +{ + "adjacency_list": { + "e6becb37-4699-47f5-81ef-e58fbdf8a9e5": [] + }, + "node_states": { + "e6becb37-4699-47f5-81ef-e58fbdf8a9e5": { + "modified": true, + "dependencies": [], + "currentStatus": "PUBLISHED" + } + } +} diff --git a/services/director-v2/tests/mocks/fake_dy_status_success.json b/services/director-v2/tests/mocks/fake_dy_status_success.json new file mode 100644 index 00000000000..c183993fd9a --- /dev/null +++ b/services/director-v2/tests/mocks/fake_dy_status_success.json @@ -0,0 +1,12 @@ +{ + "adjacency_list": { + "e6becb37-4699-47f5-81ef-e58fbdf8a9e5": [] + }, + "node_states": { + "e6becb37-4699-47f5-81ef-e58fbdf8a9e5": { + "modified": false, + "dependencies": [], + "currentStatus": "SUCCESS" + } + } +} diff --git a/services/director-v2/tests/mocks/fake_dy_workbench_template.json b/services/director-v2/tests/mocks/fake_dy_workbench_template.json new file mode 100644 index 00000000000..018309d1810 --- /dev/null +++ b/services/director-v2/tests/mocks/fake_dy_workbench_template.json @@ -0,0 +1,75 @@ +{ + "e6becb37-4699-47f5-81ef-e58fbdf8a9e5": { + "key": "simcore/services/comp/itis/sleeper", + "version": "1.0.0", + "label": "sleeper", + "inputs": { + "in_2": 1 + }, + "inputNodes": [], + "parent": null, + "thumbnail": "", + "state": { + "currentStatus": "SUCCESS" + }, + "progress": 100, + "outputs": {} + }, + "80103e12-6b01-40f2-94b8-556bd6c3dd98": { + "key": "simcore/services/dynamic/dy-static-file-server-dynamic-sidecar", + "version": "2.0.2", + "label": "dy-static-file-server-dynamic-sidecar", + "inputs": { + "string_input": "not the default value", + "integer_input": { + "nodeUuid": "e6becb37-4699-47f5-81ef-e58fbdf8a9e5", + "output": "out_2" + }, + "boolean_input": true, + "number_input": 3.14, + "file_input": { + "nodeUuid": "e6becb37-4699-47f5-81ef-e58fbdf8a9e5", + "output": "out_1" + } + }, + "inputNodes": [ + "e6becb37-4699-47f5-81ef-e58fbdf8a9e5" + ], + "parent": null, + "thumbnail": "", + "outputs": {} + }, + "78f06db4-5feb-4ea3-ad1b-176310ac71a7": { + "key": "simcore/services/dynamic/dy-static-file-server-dynamic-sidecar-compose-spec", + "version": "2.0.2", + "label": "dy-static-file-server-dynamic-sidecar-compose-spec", + "inputs": { + "string_input": { + "nodeUuid": "80103e12-6b01-40f2-94b8-556bd6c3dd98", + "output": "string_output" + }, + "integer_input": { + "nodeUuid": "80103e12-6b01-40f2-94b8-556bd6c3dd98", + "output": "integer_output" + }, + "boolean_input": { + "nodeUuid": "80103e12-6b01-40f2-94b8-556bd6c3dd98", + "output": "boolean_output" + }, + "number_input": { + "nodeUuid": "80103e12-6b01-40f2-94b8-556bd6c3dd98", + "output": "number_output" + }, + "file_input": { + "nodeUuid": "80103e12-6b01-40f2-94b8-556bd6c3dd98", + "output": "file_output" + } + }, + "inputNodes": [ + "80103e12-6b01-40f2-94b8-556bd6c3dd98" + ], + "parent": null, + "thumbnail": "", + "outputs": {} + } +} diff --git a/services/director-v2/tests/unit/test_modules_dynamic_sidecar_volumes_resolver.py b/services/director-v2/tests/unit/test_modules_dynamic_sidecar_volumes_resolver.py new file mode 100644 index 00000000000..6ee5ba9eaef --- /dev/null +++ b/services/director-v2/tests/unit/test_modules_dynamic_sidecar_volumes_resolver.py @@ -0,0 +1,72 @@ +# pylint: disable=redefined-outer-name + +import os +from pathlib import Path +from typing import Any, Callable, Dict, List +from uuid import uuid4 + +import pytest +from simcore_service_director_v2.models.schemas.constants import ( + DYNAMIC_SIDECAR_SERVICE_PREFIX, +) +from simcore_service_director_v2.modules.dynamic_sidecar.volumes_resolver import ( + DynamicSidecarVolumesPathsResolver, +) + + +# FIXTURES +@pytest.fixture(scope="module") +def compose_namespace() -> str: + return f"{DYNAMIC_SIDECAR_SERVICE_PREFIX}_{uuid4()}" + + +@pytest.fixture(scope="module") +def node_uuid() -> str: + return f"{uuid4()}" + + +@pytest.fixture(scope="module") +def state_paths() -> List[Path]: + return [Path(f"/tmp/asd/asd/{x}") for x in range(10)] + + +@pytest.fixture +def expect(node_uuid: str) -> Callable[[str, str], Dict[str, Any]]: + def _callable(source: str, target: str) -> Dict[str, Any]: + return { + "Source": source, + "Target": target, + "Type": "volume", + "VolumeOptions": {"Labels": {"uuid": node_uuid}}, + } + + return _callable + + +# TESTS + + +def test_expected_paths( + compose_namespace: str, + node_uuid: str, + state_paths: List[Path], + expect: Callable[[str, str], Dict[str, Any]], +) -> None: + path = Path("/inputs") + assert DynamicSidecarVolumesPathsResolver.mount_entry( + compose_namespace, path, node_uuid + ) == expect(source=f"{compose_namespace}_inputs", target="/dy-volumes/inputs") + + path = Path("/outputs") + assert DynamicSidecarVolumesPathsResolver.mount_entry( + compose_namespace, path, node_uuid + ) == expect(source=f"{compose_namespace}_outputs", target="/dy-volumes/outputs") + + for path in state_paths: + name_from_path = str(path).replace(os.sep, "_") + assert DynamicSidecarVolumesPathsResolver.mount_entry( + compose_namespace, path, node_uuid + ) == expect( + source=f"{compose_namespace}{name_from_path}", + target=f"/dy-volumes/{name_from_path.strip('_')}", + ) diff --git a/services/director-v2/tests/unit/with_swarm/test_modules_dynamic_sidecar_docker_service_specs.py b/services/director-v2/tests/unit/with_swarm/test_modules_dynamic_sidecar_docker_service_specs.py index b0de6d20a8f..fc87c94da73 100644 --- a/services/director-v2/tests/unit/with_swarm/test_modules_dynamic_sidecar_docker_service_specs.py +++ b/services/director-v2/tests/unit/with_swarm/test_modules_dynamic_sidecar_docker_service_specs.py @@ -11,7 +11,10 @@ SimcoreServiceSettingsLabel, ) from pytest_lazyfixture import lazy_fixture -from simcore_service_director_v2.core.settings import DynamicSidecarSettings +from simcore_service_director_v2.core.settings import ( + AppSettings, + DynamicSidecarSettings, +) from simcore_service_director_v2.models.schemas.dynamic_services import SchedulerData from simcore_service_director_v2.modules.dynamic_sidecar.docker_service_specs import ( get_dynamic_sidecar_spec, @@ -24,6 +27,10 @@ def mocked_env(monkeypatch: MonkeyPatch) -> Iterator[Dict[str, str]]: env_vars: Dict[str, str] = { "DYNAMIC_SIDECAR_IMAGE": "local/dynamic-sidecar:MOCK", + "POSTGRES_HOST": "test_host", + "POSTGRES_USER": "test_user", + "POSTGRES_PASSWORD": "test_password", + "POSTGRES_DB": "test_db", } with monkeypatch.context() as m: @@ -79,6 +86,7 @@ async def test_get_dynamic_proxy_spec( dynamic_sidecar_network_id=dynamic_sidecar_network_id, swarm_network_id=swarm_network_id, settings=cast(SimcoreServiceSettingsLabel, simcore_service_labels.settings), + app_settings=AppSettings.create_from_envs(), ) assert dynamic_sidecar_spec pprint(dynamic_sidecar_spec) diff --git a/services/director-v2/tests/unit/with_swarm/test_modules_dynamic_sidecar_scheduler.py b/services/director-v2/tests/unit/with_swarm/test_modules_dynamic_sidecar_scheduler.py index 245c1476294..8f1b2142f4b 100644 --- a/services/director-v2/tests/unit/with_swarm/test_modules_dynamic_sidecar_scheduler.py +++ b/services/director-v2/tests/unit/with_swarm/test_modules_dynamic_sidecar_scheduler.py @@ -3,12 +3,14 @@ import asyncio +import json import logging import re +import urllib.parse from asyncio import BaseEventLoop from contextlib import asynccontextmanager, contextmanager from importlib import reload -from typing import AsyncGenerator, Callable, Iterator, List, Type, Union +from typing import Any, AsyncGenerator, Callable, Dict, Iterator, List, Type, Union from unittest.mock import AsyncMock import httpx @@ -17,6 +19,7 @@ from _pytest.monkeypatch import MonkeyPatch from fastapi import FastAPI from fastapi.testclient import TestClient +from models_library.service_settings_labels import SimcoreServiceLabels from pytest_mock.plugin import MockerFixture from respx.router import MockRouter from simcore_service_director_v2.core.settings import AppSettings @@ -26,6 +29,7 @@ SchedulerData, ServiceState, ) +from simcore_service_director_v2.modules.director_v0 import DirectorV0Client from simcore_service_director_v2.modules.dynamic_sidecar import module_setup from simcore_service_director_v2.modules.dynamic_sidecar.client_api import ( get_url, @@ -87,6 +91,16 @@ def _mock_containers_docker_status( yield mock +async def _assert_remove_service( + scheduler: DynamicSidecarsScheduler, scheduler_data: SchedulerData +) -> None: + # pylint: disable=protected-access + await scheduler.mark_service_for_removal(scheduler_data.node_uuid, True) + assert scheduler_data.service_name in scheduler._to_observe + await scheduler.finish_service_removal(scheduler_data.node_uuid) + assert scheduler_data.service_name not in scheduler._to_observe + + @asynccontextmanager async def _assert_get_dynamic_services_mocked( scheduler: DynamicSidecarsScheduler, @@ -102,12 +116,38 @@ async def _assert_get_dynamic_services_mocked( yield stack_status - await scheduler.remove_service(scheduler_data.node_uuid, True) + await _assert_remove_service(scheduler, scheduler_data) + + +def _assemble_labels(scheduler_data: SchedulerData) -> Dict[str, Any]: + return { + "simcore.service.container-http-entrypoint": scheduler_data.container_http_entry, + "simcore.service.paths-mapping": json.dumps(scheduler_data.paths_mapping), + "simcore.service.compose-spec": json.dumps(scheduler_data.compose_spec), + } # FIXTURES +@pytest.fixture +def mocked_director_v0( + dynamic_sidecar_settings: AppSettings, scheduler_data: SchedulerData +) -> MockRouter: + endpoint = dynamic_sidecar_settings.DIRECTOR_V0.endpoint + + with respx.mock as mock: + mock.get( + re.compile( + fr"^{endpoint}/services/{urllib.parse.quote_plus(scheduler_data.key)}/{scheduler_data.version}/labels" + ), + name="service labels", + ).respond( + json={"data": SimcoreServiceLabels.Config.schema_extra["examples"][0]} + ) + yield mock + + @pytest.fixture def mocked_dynamic_scheduler_events() -> None: class AlwaysTriggersDynamicSchedulerEvent(DynamicSchedulerEvent): @@ -165,11 +205,22 @@ def dynamic_sidecar_settings(monkeypatch: MonkeyPatch) -> AppSettings: @pytest.fixture async def mocked_app( - loop: BaseEventLoop, dynamic_sidecar_settings: AppSettings, docker_swarm: None + loop: BaseEventLoop, + dynamic_sidecar_settings: AppSettings, + mocked_director_v0: MockRouter, + docker_swarm: None, ) -> Iterator[FastAPI]: app = FastAPI() app.state.settings = dynamic_sidecar_settings + log.info("AppSettings=%s", dynamic_sidecar_settings) try: + DirectorV0Client.create( + app, + client=httpx.AsyncClient( + base_url=f"{dynamic_sidecar_settings.DIRECTOR_V0.endpoint}", + timeout=dynamic_sidecar_settings.CLIENT_REQUEST.HTTP_CLIENT_REQUEST_TOTAL_TIMEOUT, + ), + ) await setup_api_client(app) await setup_scheduler(app) @@ -240,7 +291,7 @@ async def test_scheduler_add_remove( await ensure_scheduler_runs_once() assert scheduler_data.dynamic_sidecar.is_available is True - await scheduler.remove_service(scheduler_data.node_uuid, True) + await _assert_remove_service(scheduler, scheduler_data) async def test_scheduler_removes_partially_started_services( @@ -284,7 +335,7 @@ async def test_scheduler_health_timing_out( await scheduler.add_service(scheduler_data) await ensure_scheduler_runs_once() - assert scheduler_data.dynamic_sidecar.is_available == False + assert scheduler_data.dynamic_sidecar.is_available is False async def test_adding_service_two_times( @@ -329,7 +380,7 @@ async def test_remove_missing_no_error( mocked_dynamic_scheduler_events: None, ) -> None: with pytest.raises(DynamicSidecarNotFoundError) as execinfo: - await scheduler.remove_service(scheduler_data.node_uuid, True) + await scheduler.mark_service_for_removal(scheduler_data.node_uuid, True) assert f"node {scheduler_data.node_uuid} not found" == str(execinfo.value) diff --git a/services/director-v2/tests/unit/with_swarm/test_routes_dynamic_services.py b/services/director-v2/tests/unit/with_swarm/test_routes_dynamic_services.py index 840395b7198..9c8a958b941 100644 --- a/services/director-v2/tests/unit/with_swarm/test_routes_dynamic_services.py +++ b/services/director-v2/tests/unit/with_swarm/test_routes_dynamic_services.py @@ -2,25 +2,34 @@ # pylint:disable=unused-argument # pylint:disable=redefined-outer-name - import json +import logging +import os import urllib +from argparse import Namespace from collections import namedtuple -from typing import Any, Dict, Optional +from typing import Any, Dict, Iterator, Optional +from uuid import UUID import pytest import respx +from _pytest.monkeypatch import MonkeyPatch from fastapi import FastAPI from httpx import URL, QueryParams from models_library.projects_nodes_io import NodeID from models_library.service_settings_labels import SimcoreServiceLabels from pytest_mock.plugin import MockerFixture +from respx import MockRouter from simcore_service_director_v2.models.domains.dynamic_services import ( DynamicServiceCreate, + RetrieveDataOutEnveloped, ) from simcore_service_director_v2.models.schemas.dynamic_services import ( RunningDynamicServiceDetails, ) +from simcore_service_director_v2.models.schemas.dynamic_services.scheduler import ( + SchedulerData, +) from simcore_service_director_v2.modules.dynamic_sidecar.client_api import ( setup_api_client, ) @@ -33,11 +42,15 @@ from starlette import status from starlette.testclient import TestClient -ServiceParams = namedtuple("ServiceParams", "service, service_labels, exp_status_code") +ServiceParams = namedtuple( + "ServiceParams", "service, service_labels, exp_status_code, is_legacy" +) + +logger = logging.getLogger(__name__) @pytest.fixture -def minimal_config(project_env_devel_environment, monkeypatch) -> None: +def minimal_config(project_env_devel_environment, monkeypatch: MonkeyPatch) -> None: """set a minimal configuration for testing the director connection only""" monkeypatch.setenv("SC_BOOT_MODE", "default") monkeypatch.setenv("DIRECTOR_ENABLED", "1") @@ -57,10 +70,93 @@ def dynamic_sidecar_headers() -> Dict[str, str]: } +@pytest.fixture(scope="function") +def mock_env(monkeypatch: MonkeyPatch, docker_swarm: None) -> None: + # Works as below line in docker.compose.yml + # ${DOCKER_REGISTRY:-itisfoundation}/dynamic-sidecar:${DOCKER_IMAGE_TAG:-latest} + + registry = os.environ.get("DOCKER_REGISTRY", "local") + image_tag = os.environ.get("DOCKER_IMAGE_TAG", "production") + + image_name = f"{registry}/dynamic-sidecar:{image_tag}" + + logger.warning("Patching to: DYNAMIC_SIDECAR_IMAGE=%s", image_name) + monkeypatch.setenv("DYNAMIC_SIDECAR_IMAGE", image_name) + + monkeypatch.setenv("SIMCORE_SERVICES_NETWORK_NAME", "test_network_name") + monkeypatch.setenv("TRAEFIK_SIMCORE_ZONE", "test_traefik_zone") + monkeypatch.setenv("SWARM_STACK_NAME", "test_swarm_name") + monkeypatch.setenv("DIRECTOR_V2_CELERY_SCHEDULER_ENABLED", "false") + monkeypatch.setenv("DIRECTOR_V2_DASK_CLIENT_ENABLED", "false") + monkeypatch.setenv("DIRECTOR_V2_DASK_SCHEDULER_ENABLED", "false") + monkeypatch.setenv("DIRECTOR_V2_DYNAMIC_SCHEDULER_ENABLED", "true") + + monkeypatch.setenv("POSTGRES_HOST", "mocked_host") + monkeypatch.setenv("POSTGRES_USER", "mocked_user") + monkeypatch.setenv("POSTGRES_PASSWORD", "mocked_password") + monkeypatch.setenv("POSTGRES_DB", "mocked_db") + monkeypatch.setenv("DIRECTOR_V2_POSTGRES_ENABLED", "false") + + monkeypatch.setenv("SC_BOOT_MODE", "production") + + +@pytest.fixture(scope="function") +def minimal_app(client: TestClient) -> FastAPI: + # disbale shutdown events, not required for these tests + client.app.router.on_shutdown = [] + return client.app + + +@pytest.fixture +async def mock_retrieve_features( + minimal_app: FastAPI, + service: Dict[str, Any], + is_legacy: bool, + scheduler_data_from_http_request: SchedulerData, +) -> Iterator[Optional[MockRouter]]: + with respx.mock( + assert_all_called=False, + assert_all_mocked=True, + ) as respx_mock: + if is_legacy: + service_details = RunningDynamicServiceDetails.parse_obj( + RunningDynamicServiceDetails.Config.schema_extra["examples"][0] + ) + respx_mock.post( + f"{service_details.legacy_service_url}/retrieve", name="retrieve" + ).respond(json=RetrieveDataOutEnveloped.Config.schema_extra["examples"][0]) + + yield respx_mock + # no cleanup required + else: + await setup_scheduler(minimal_app) + await setup_api_client(minimal_app) + + dynamic_sidecar_scheduler = minimal_app.state.dynamic_sidecar_scheduler + node_uuid = UUID(service["node_uuid"]) + serice_name = "serice_name" + + # pylint: disable=protected-access + dynamic_sidecar_scheduler._inverse_search_mapping[node_uuid] = serice_name + dynamic_sidecar_scheduler._to_observe[serice_name] = Namespace( + scheduler_data=scheduler_data_from_http_request + ) + + respx_mock.post( + f"{scheduler_data_from_http_request.dynamic_sidecar.endpoint}/v1/containers/ports/inputs:pull", + name="service_pull_input_ports", + ).respond(json=42) + + yield respx_mock + + dynamic_sidecar_scheduler._inverse_search_mapping.pop(node_uuid) + dynamic_sidecar_scheduler._to_observe.pop(serice_name) + + @pytest.fixture def mocked_director_v0_service_api( minimal_app: FastAPI, service: Dict[str, Any], service_labels: Dict[str, Any] -): +) -> MockRouter: with respx.mock( base_url=minimal_app.state.settings.DIRECTOR_V0.endpoint, assert_all_called=False, @@ -75,7 +171,11 @@ def mocked_director_v0_service_api( respx_mock.get( f"/running_interactive_services/{service['node_uuid']}", name="running interactive service", - ).respond(json={"data": {}}) + ).respond( + json={ + "data": RunningDynamicServiceDetails.Config.schema_extra["examples"][0] + } + ) yield respx_mock @@ -99,24 +199,25 @@ def get_stack_status(node_uuid: NodeID) -> RunningDynamicServiceDetails: ) # MOCKING remove_service - def remove_service(node_uuid: NodeID, save_state: Optional[bool]) -> None: + def remove_service(node_uuid: NodeID, can_save: Optional[bool]) -> None: if exp_status_code == status.HTTP_307_TEMPORARY_REDIRECT: raise DynamicSidecarNotFoundError(node_uuid) mocker.patch( - "simcore_service_director_v2.modules.dynamic_sidecar.scheduler.task.DynamicSidecarsScheduler.remove_service", + "simcore_service_director_v2.modules.dynamic_sidecar.scheduler.task.DynamicSidecarsScheduler.mark_service_for_removal", side_effect=remove_service, ) @pytest.mark.parametrize( - "service, service_labels, exp_status_code", + "service, service_labels, exp_status_code, is_legacy", [ pytest.param( *ServiceParams( service=DynamicServiceCreate.Config.schema_extra["example"], service_labels=SimcoreServiceLabels.Config.schema_extra["examples"][0], exp_status_code=status.HTTP_307_TEMPORARY_REDIRECT, + is_legacy=True, ), id="LEGACY", ), @@ -125,6 +226,7 @@ def remove_service(node_uuid: NodeID, save_state: Optional[bool]) -> None: service=DynamicServiceCreate.Config.schema_extra["example"], service_labels=SimcoreServiceLabels.Config.schema_extra["examples"][1], exp_status_code=status.HTTP_201_CREATED, + is_legacy=False, ), id="DYNAMIC", ), @@ -133,6 +235,7 @@ def remove_service(node_uuid: NodeID, save_state: Optional[bool]) -> None: service=DynamicServiceCreate.Config.schema_extra["example"], service_labels=SimcoreServiceLabels.Config.schema_extra["examples"][2], exp_status_code=status.HTTP_201_CREATED, + is_legacy=False, ), id="DYNAMIC_COMPOSE", ), @@ -140,18 +243,15 @@ def remove_service(node_uuid: NodeID, save_state: Optional[bool]) -> None: ) async def test_create_dynamic_services( minimal_config: None, - mocked_director_v0_service_api, + mocked_director_v0_service_api: MockRouter, docker_swarm: None, mocked_director_v2_scheduler: None, client: TestClient, dynamic_sidecar_headers: Dict[str, str], service: Dict[str, Any], exp_status_code: int, + is_legacy: bool, ): - # dynamic-sidecar components - await setup_scheduler(client.app) - await setup_api_client(client.app) - post_data = DynamicServiceCreate(**service) response = client.post( @@ -182,13 +282,14 @@ async def test_create_dynamic_services( @pytest.mark.parametrize( - "service, service_labels, exp_status_code", + "service, service_labels, exp_status_code, is_legacy", [ pytest.param( *ServiceParams( service=DynamicServiceCreate.Config.schema_extra["example"], service_labels=SimcoreServiceLabels.Config.schema_extra["examples"][0], exp_status_code=status.HTTP_307_TEMPORARY_REDIRECT, + is_legacy=True, ), id="LEGACY", ), @@ -197,6 +298,7 @@ async def test_create_dynamic_services( service=DynamicServiceCreate.Config.schema_extra["example"], service_labels=SimcoreServiceLabels.Config.schema_extra["examples"][1], exp_status_code=status.HTTP_200_OK, + is_legacy=False, ), id="DYNAMIC", ), @@ -205,21 +307,20 @@ async def test_create_dynamic_services( service=DynamicServiceCreate.Config.schema_extra["example"], service_labels=SimcoreServiceLabels.Config.schema_extra["examples"][2], exp_status_code=status.HTTP_200_OK, + is_legacy=False, ), id="DYNAMIC_COMPOSE", ), ], ) async def test_get_service_status( - mocked_director_v0_service_api, + mocked_director_v0_service_api: MockRouter, mocked_director_v2_scheduler: None, client: TestClient, service: Dict[str, Any], exp_status_code: int, + is_legacy: bool, ): - # dynamic-sidecar components - await setup_scheduler(client.app) - url = URL(f"/v2/dynamic_services/{service['node_uuid']}") response = client.get(str(url), allow_redirects=False) @@ -239,13 +340,14 @@ async def test_get_service_status( @pytest.mark.parametrize( - "service, service_labels, exp_status_code", + "service, service_labels, exp_status_code, is_legacy", [ pytest.param( *ServiceParams( service=DynamicServiceCreate.Config.schema_extra["example"], service_labels=SimcoreServiceLabels.Config.schema_extra["examples"][0], exp_status_code=status.HTTP_307_TEMPORARY_REDIRECT, + is_legacy=True, ), id="LEGACY", ), @@ -254,6 +356,7 @@ async def test_get_service_status( service=DynamicServiceCreate.Config.schema_extra["example"], service_labels=SimcoreServiceLabels.Config.schema_extra["examples"][1], exp_status_code=status.HTTP_204_NO_CONTENT, + is_legacy=False, ), id="DYNAMIC", ), @@ -262,29 +365,28 @@ async def test_get_service_status( service=DynamicServiceCreate.Config.schema_extra["example"], service_labels=SimcoreServiceLabels.Config.schema_extra["examples"][2], exp_status_code=status.HTTP_204_NO_CONTENT, + is_legacy=False, ), id="DYNAMIC_COMPOSE", ), ], ) @pytest.mark.parametrize( - "save_state, exp_save_state", [(None, True), (True, True), (False, False)] + "can_save, exp_save_state", [(None, True), (True, True), (False, False)] ) async def test_delete_service( - mocked_director_v0_service_api, + mocked_director_v0_service_api: MockRouter, mocked_director_v2_scheduler: None, client: TestClient, service: Dict[str, Any], exp_status_code: int, - save_state: Optional[bool], + is_legacy: bool, + can_save: Optional[bool], exp_save_state: bool, ): - # dynamic-sidecar components - await setup_scheduler(client.app) - url = URL(f"/v2/dynamic_services/{service['node_uuid']}") - if save_state is not None: - url = url.copy_with(params={"save_state": save_state}) + if can_save is not None: + url = url.copy_with(params={"can_save": can_save}) response = client.delete(str(url), allow_redirects=False) assert ( @@ -299,4 +401,55 @@ async def test_delete_service( redirect_url.path == f"/v0/running_interactive_services/{service['node_uuid']}" ) - assert redirect_url.params == QueryParams(save_state=exp_save_state) + assert redirect_url.params == QueryParams(can_save=exp_save_state) + + +@pytest.mark.parametrize( + "service, service_labels, exp_status_code, is_legacy", + [ + pytest.param( + *ServiceParams( + service=DynamicServiceCreate.Config.schema_extra["example"], + service_labels=SimcoreServiceLabels.Config.schema_extra["examples"][0], + exp_status_code=status.HTTP_200_OK, + is_legacy=True, + ), + id="LEGACY", + ), + pytest.param( + *ServiceParams( + service=DynamicServiceCreate.Config.schema_extra["example"], + service_labels=SimcoreServiceLabels.Config.schema_extra["examples"][1], + exp_status_code=status.HTTP_200_OK, + is_legacy=False, + ), + id="DYNAMIC", + ), + pytest.param( + *ServiceParams( + service=DynamicServiceCreate.Config.schema_extra["example"], + service_labels=SimcoreServiceLabels.Config.schema_extra["examples"][2], + exp_status_code=status.HTTP_200_OK, + is_legacy=False, + ), + id="DYNAMIC_COMPOSE", + ), + ], +) +async def test_retrieve( + mock_retrieve_features: Optional[MockRouter], + mocked_director_v0_service_api: MockRouter, + mocked_director_v2_scheduler: None, + client: TestClient, + service: Dict[str, Any], + exp_status_code: int, + is_legacy: bool, +) -> None: + url = URL(f"/v2/dynamic_services/{service['node_uuid']}:retrieve") + response = client.post(str(url), json=dict(port_keys=[]), allow_redirects=False) + assert ( + response.status_code == exp_status_code + ), f"expected status code {exp_status_code}, received {response.status_code}: {response.text}" + assert ( + response.json() == RetrieveDataOutEnveloped.Config.schema_extra["examples"][0] + ) diff --git a/services/director/Dockerfile b/services/director/Dockerfile index fb7072d868b..d8407b02486 100644 --- a/services/director/Dockerfile +++ b/services/director/Dockerfile @@ -10,12 +10,12 @@ FROM python:${PYTHON_VERSION}-slim-buster as base LABEL maintainer=sanderegg -RUN set -eux; \ - apt-get update; \ - apt-get install -y gosu; \ - rm -rf /var/lib/apt/lists/*; \ - # verify that the binary works - gosu nobody true +RUN set -eux && \ + apt-get update && \ + apt-get install -y gosu && \ + rm -rf /var/lib/apt/lists/* && \ + # verify that the binary works + gosu nobody true # simcore-user uid=8004(scu) gid=8004(scu) groups=8004(scu) ENV SC_USER_ID=8004 \ diff --git a/services/dynamic-sidecar/.env-devel b/services/dynamic-sidecar/.env-devel index 58400c62eea..64212477ea7 100644 --- a/services/dynamic-sidecar/.env-devel +++ b/services/dynamic-sidecar/.env-devel @@ -9,6 +9,12 @@ SC_BOOT_MODE=local-development # service specific required vars DYNAMIC_SIDECAR_compose_namespace=dev-namespace +DY_SIDECAR_PATH_INPUTS=/tmp +DY_SIDECAR_PATH_OUTPUTS=/tmp +DY_SIDECAR_STATE_PATHS='["/tmp"]' +DY_SIDECAR_USER_ID=1 +DY_SIDECAR_PROJECT_ID=4539cfa0-8366-4c77-bf42-790684c7f564 +DY_SIDECAR_NODE_ID=d286bc62-3b4d-416a-90a2-3aec949468c5 # DOCKER REGISTRY REGISTRY_auth=false diff --git a/services/dynamic-sidecar/Dockerfile b/services/dynamic-sidecar/Dockerfile index 01c323efa90..c549443a8ee 100644 --- a/services/dynamic-sidecar/Dockerfile +++ b/services/dynamic-sidecar/Dockerfile @@ -10,35 +10,38 @@ FROM python:${PYTHON_VERSION}-slim-buster as base LABEL maintainer="Andrei Neagu " -RUN set -eux; \ - apt-get update; \ - apt-get install -y gosu; \ - rm -rf /var/lib/apt/lists/*; \ - # verify that the binary works - gosu nobody true +RUN set -eux && \ + apt-get update && \ + apt-get install -y gosu && \ + rm -rf /var/lib/apt/lists/* && \ + # verify that the binary works + gosu nobody true # simcore-user uid=8004(scu) gid=8004(scu) groups=8004(scu) ENV SC_USER_ID=8004 \ - SC_USER_NAME=scu \ - SC_BUILD_TARGET=base \ - SC_BOOT_MODE=default + SC_USER_NAME=scu \ + SC_BUILD_TARGET=base \ + SC_BOOT_MODE=default RUN adduser \ - --uid ${SC_USER_ID} \ - --disabled-password \ - --gecos "" \ - --shell /bin/sh \ - --home /home/${SC_USER_NAME} \ - ${SC_USER_NAME} + --uid ${SC_USER_ID} \ + --disabled-password \ + --gecos "" \ + --shell /bin/sh \ + --home /home/${SC_USER_NAME} \ + ${SC_USER_NAME} # Sets utf-8 encoding for Python et al ENV LANG=C.UTF-8 # Turns off writing .pyc files; superfluous on an ephemeral container. ENV PYTHONDONTWRITEBYTECODE=1 \ - VIRTUAL_ENV=/home/scu/.venv + VIRTUAL_ENV=/home/scu/.venv # Ensures that the python and pip executables used # in the image will be those from our virtualenv. ENV PATH="${VIRTUAL_ENV}/bin:$PATH" +# directory where dynamic-sidecar stores creates and shares +# volumes between itself and the spawned containers +ENV DY_VOLUMES="/dy-volumes" # -------------------------- Build stage ------------------- @@ -51,12 +54,13 @@ FROM base as build ENV SC_BUILD_TARGET=build RUN apt-get update &&\ - apt-get install -y --no-install-recommends \ - build-essential + apt-get install -y --no-install-recommends \ + build-essential # NOTE: python virtualenv is used here such that installed # packages may be moved to production image easily by copying the venv RUN python -m venv ${VIRTUAL_ENV} +RUN mkdir -p ${DY_VOLUMES} RUN pip install --upgrade --no-cache-dir \ pip~=21.3 \ @@ -86,7 +90,7 @@ COPY --chown=scu:scu services/dynamic-sidecar /build/services/dynamic-sidecar WORKDIR /build/services/dynamic-sidecar RUN pip --no-cache-dir install -r requirements/prod.txt &&\ - pip --no-cache-dir list -v + pip --no-cache-dir list -v # --------------------------Production stage ------------------- @@ -99,7 +103,7 @@ RUN pip --no-cache-dir install -r requirements/prod.txt &&\ FROM base as production ENV SC_BUILD_TARGET=production \ - SC_BOOT_MODE=production + SC_BOOT_MODE=production ENV PYTHONOPTIMIZE=TRUE @@ -107,16 +111,18 @@ WORKDIR /home/scu # Starting from clean base image, copies pre-installed virtualenv from prod-only-deps COPY --chown=scu:scu --from=prod-only-deps ${VIRTUAL_ENV} ${VIRTUAL_ENV} +COPY --chown=scu:scu --from=prod-only-deps ${DY_VOLUMES} ${DY_VOLUMES} # Copies booting scripts COPY --chown=scu:scu services/dynamic-sidecar/docker services/dynamic-sidecar/docker RUN chmod +x services/dynamic-sidecar/docker/*.sh -HEALTHCHECK --interval=30s \ - --timeout=20s \ - --start-period=30s \ - --retries=3 \ - CMD ["python3", "services/dynamic-sidecar/docker/healthcheck.py", "http://localhost:8000/health"] +# more agressive checking helps with detecting the searvice is ready +HEALTHCHECK --interval=1s \ + --timeout=1s \ + --start-period=1s \ + --retries=120 \ + CMD ["python3", "services/dynamic-sidecar/docker/healthcheck.py", "http://localhost:8000/health"] EXPOSE 8000 @@ -139,6 +145,7 @@ ENV SC_BUILD_TARGET=development WORKDIR /devel RUN chown -R scu:scu ${VIRTUAL_ENV} +RUN chown -R scu:scu ${DY_VOLUMES} EXPOSE 8000 EXPOSE 3000 diff --git a/services/dynamic-sidecar/VERSION b/services/dynamic-sidecar/VERSION index 8acdd82b765..3eefcb9dd5b 100644 --- a/services/dynamic-sidecar/VERSION +++ b/services/dynamic-sidecar/VERSION @@ -1 +1 @@ -0.0.1 +1.0.0 diff --git a/services/dynamic-sidecar/docker/entrypoint.sh b/services/dynamic-sidecar/docker/entrypoint.sh index 057a7f3bbdc..507ac7ee359 100755 --- a/services/dynamic-sidecar/docker/entrypoint.sh +++ b/services/dynamic-sidecar/docker/entrypoint.sh @@ -88,8 +88,14 @@ if stat $DOCKER_MOUNT >/dev/null 2>&1; then adduser "$SC_USER_NAME" "$GROUPNAME" fi +# Change ownership of volumes mount directory +# directories are empty at this point +# each individual subdirectory is a unique volume +chown --verbose --recursive "$SC_USER_NAME":"$GROUPNAME" "${DY_VOLUMES}" + echo "$INFO Starting $* ..." echo " $SC_USER_NAME rights : $(id "$SC_USER_NAME")" echo " local dir : $(ls -al)" +echo " volumes dir : $(ls -al "${DY_VOLUMES}")" exec gosu "$SC_USER_NAME" "$@" diff --git a/services/dynamic-sidecar/openapi.json b/services/dynamic-sidecar/openapi.json index 334dfedf827..fa64600c1e9 100644 --- a/services/dynamic-sidecar/openapi.json +++ b/services/dynamic-sidecar/openapi.json @@ -305,49 +305,55 @@ } } }, - "/push": { + "/v1/containers/state:restore": { "post": { "tags": [ - "Mocked frontend calls" + "containers" ], - "summary": "Ignored Push Post", - "operationId": "ignored_push_post_push_post", + "summary": "Restores the state of the dynamic service", + "description": "When restoring the state:\n- pull inputs via nodeports\n- pull all the extra state paths", + "operationId": "restore_state_v1_containers_state_restore_post", "responses": { - "200": { - "description": "Successful Response", - "content": { - "application/json": { - "schema": {} - } - } + "204": { + "description": "Successful Response" } } } }, - "/retrieve": { - "get": { + "/v1/containers/state:save": { + "post": { "tags": [ - "Mocked frontend calls" + "containers" ], - "summary": "Ignored Port Data Load", - "operationId": "ignored_port_data_load_retrieve_get", + "summary": "Stores the state of the dynamic service", + "operationId": "save_state_v1_containers_state_save_post", "responses": { - "200": { - "description": "Successful Response", - "content": { - "application/json": { - "schema": {} - } - } + "204": { + "description": "Successful Response" } } - }, + } + }, + "/v1/containers/ports/inputs:pull": { "post": { "tags": [ - "Mocked frontend calls" + "containers" ], - "summary": "Ignored Port Data Save", - "operationId": "ignored_port_data_save_retrieve_post", + "summary": "Pull input ports data", + "operationId": "pull_input_ports_v1_containers_ports_inputs_pull_post", + "requestBody": { + "content": { + "application/json": { + "schema": { + "title": "Port Keys", + "type": "array", + "items": { + "type": "string" + } + } + } + } + }, "responses": { "200": { "description": "Successful Response", @@ -356,40 +362,51 @@ "schema": {} } } - } - } - } - }, - "/state": { - "get": { - "tags": [ - "Mocked frontend calls" - ], - "summary": "Ignored Load Service State State", - "operationId": "ignored_load_service_state_state_state_get", - "responses": { - "200": { - "description": "Successful Response", + }, + "422": { + "description": "Validation Error", "content": { "application/json": { - "schema": {} + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } } } } } - }, + } + }, + "/v1/containers/ports/outputs:push": { "post": { "tags": [ - "Mocked frontend calls" + "containers" ], - "summary": "Ignored Save Service State State", - "operationId": "ignored_save_service_state_state_state_post", + "summary": "Push output ports data", + "operationId": "push_output_ports_v1_containers_ports_outputs_push_post", + "requestBody": { + "content": { + "application/json": { + "schema": { + "title": "Port Keys", + "type": "array", + "items": { + "type": "string" + } + } + } + } + }, "responses": { - "200": { - "description": "Successful Response", + "204": { + "description": "Successful Response" + }, + "422": { + "description": "Validation Error", "content": { "application/json": { - "schema": {} + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } } } } diff --git a/services/dynamic-sidecar/requirements/_base.in b/services/dynamic-sidecar/requirements/_base.in index 87d8924f365..6468b9441cb 100644 --- a/services/dynamic-sidecar/requirements/_base.in +++ b/services/dynamic-sidecar/requirements/_base.in @@ -11,6 +11,7 @@ # service-library[fastapi] --requirement ../../../packages/service-library/requirements/_base.in --requirement ../../../packages/service-library/requirements/_fastapi.in +--requirement ../../../packages/simcore-sdk/requirements/_base.in fastapi @@ -23,3 +24,4 @@ aiofiles PyYAML async-timeout async_generator +watchdog diff --git a/services/dynamic-sidecar/requirements/_base.txt b/services/dynamic-sidecar/requirements/_base.txt index b4226a1c432..0eb932bb420 100644 --- a/services/dynamic-sidecar/requirements/_base.txt +++ b/services/dynamic-sidecar/requirements/_base.txt @@ -8,10 +8,13 @@ aiodebug==1.1.2 # via # -c requirements/../../../packages/service-library/requirements/./_base.in # -r requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in aiodocker==0.19.1 # via -r requirements/_base.in aiofiles==0.7.0 - # via -r requirements/_base.in + # via + # -r requirements/../../../packages/simcore-sdk/requirements/_base.in + # -r requirements/_base.in aiohttp==3.7.4.post0 # via # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -19,8 +22,15 @@ aiohttp==3.7.4.post0 # -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/service-library/requirements/./../../../requirements/constraints.txt # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../requirements/constraints.txt # -c requirements/../../../requirements/constraints.txt + # -r requirements/../../../packages/simcore-sdk/requirements/_base.in # aiodocker +aiopg==1.3.1 + # via -r requirements/../../../packages/simcore-sdk/requirements/_base.in alembic==1.7.4 # via -r requirements/../../../packages/postgres-database/requirements/_base.in async-generator==1.10 @@ -29,10 +39,13 @@ async-timeout==3.0.1 # via # -r requirements/_base.in # aiohttp + # aiopg attrs==20.3.0 # via # -c requirements/../../../packages/service-library/requirements/././constraints.txt # -c requirements/../../../packages/service-library/requirements/./constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/./constraints.txt + # -r requirements/../../../packages/simcore-sdk/requirements/_base.in # aiohttp # jsonschema bcrypt==3.2.0 @@ -50,6 +63,7 @@ chardet==4.0.0 # requests click==7.1.2 # via + # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/_migration.in # typer # uvicorn cryptography==3.4.7 @@ -59,6 +73,10 @@ cryptography==3.4.7 # -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/service-library/requirements/./../../../requirements/constraints.txt # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../requirements/constraints.txt # -c requirements/../../../requirements/constraints.txt # paramiko distro==1.5.0 @@ -66,7 +84,9 @@ distro==1.5.0 dnspython==2.1.0 # via email-validator docker==5.0.2 - # via docker-compose + # via + # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/_migration.in + # docker-compose docker-compose==1.29.1 # via # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -74,6 +94,10 @@ docker-compose==1.29.1 # -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/service-library/requirements/./../../../requirements/constraints.txt # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../requirements/constraints.txt # -c requirements/../../../requirements/constraints.txt # -r requirements/_base.in dockerpty==0.4.1 @@ -95,8 +119,12 @@ idna==2.10 # via # -c requirements/../../../packages/service-library/requirements/././constraints.txt # -c requirements/../../../packages/service-library/requirements/./constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/_base.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/./constraints.txt # -r requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/postgres-database/requirements/_base.in + # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/models-library/requirements/_base.in + # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/_base.in # email-validator # requests # yarl @@ -109,6 +137,10 @@ importlib-resources==5.3.0 ; python_version < "3.9" # -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/service-library/requirements/./../../../requirements/constraints.txt # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../requirements/constraints.txt # -c requirements/../../../requirements/constraints.txt # alembic jaeger-client==4.8.0 @@ -121,16 +153,25 @@ markupsafe==2.0.1 # via mako multidict==5.1.0 # via + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/_base.txt # aiohttp # yarl +networkx==2.6.3 + # via -r requirements/../../../packages/simcore-sdk/requirements/_base.in opentracing==2.4.0 # via # fastapi-contrib # jaeger-client +packaging==21.0 + # via -r requirements/../../../packages/simcore-sdk/requirements/_base.in paramiko==2.7.2 # via docker psycopg2-binary==2.9.1 - # via sqlalchemy + # via + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/_base.txt + # -r requirements/../../../packages/simcore-sdk/requirements/_base.in + # aiopg + # sqlalchemy pycparser==2.20 # via cffi pydantic==1.8.2 @@ -141,18 +182,28 @@ pydantic==1.8.2 # -c requirements/../../../packages/service-library/requirements/./../../../requirements/constraints.txt # -c requirements/../../../packages/service-library/requirements/./_base.in # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../requirements/constraints.txt # -c requirements/../../../requirements/constraints.txt # -r requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/_base.in # -r requirements/../../../packages/settings-library/requirements/_base.in + # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/models-library/requirements/_base.in + # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/simcore-sdk/requirements/_base.in # -r requirements/_base.in # fastapi pyinstrument==4.0.3 # via # -c requirements/../../../packages/service-library/requirements/./_base.in # -r requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in pynacl==1.4.0 # via paramiko +pyparsing==2.4.7 + # via packaging pyrsistent==0.18.0 # via jsonschema python-dotenv==0.18.0 @@ -165,10 +216,16 @@ pyyaml==5.4.1 # -c requirements/../../../packages/service-library/requirements/./../../../requirements/constraints.txt # -c requirements/../../../packages/service-library/requirements/./_base.in # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../requirements/constraints.txt # -c requirements/../../../requirements/constraints.txt # -r requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in # -r requirements/_base.in # docker-compose + # trafaret-config requests==2.25.1 # via # docker @@ -188,8 +245,15 @@ sqlalchemy==1.3.24 # -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/service-library/requirements/./../../../requirements/constraints.txt # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/_base.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../requirements/constraints.txt # -c requirements/../../../requirements/constraints.txt # -r requirements/../../../packages/postgres-database/requirements/_base.in + # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/_base.in + # aiopg # alembic starlette==0.14.2 # via fastapi @@ -197,6 +261,9 @@ tenacity==8.0.1 # via # -c requirements/../../../packages/service-library/requirements/./_base.in # -r requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/_migration.in + # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/simcore-sdk/requirements/_base.in texttable==1.6.3 # via docker-compose threadloop==1.0.2 @@ -207,6 +274,12 @@ tornado==6.1 # via # jaeger-client # threadloop +tqdm==4.62.3 + # via -r requirements/../../../packages/simcore-sdk/requirements/_base.in +trafaret==2.1.0 + # via trafaret-config +trafaret-config==2.0.2 + # via -r requirements/../../../packages/simcore-sdk/requirements/_base.in typer==0.4.0 # via -r requirements/../../../packages/settings-library/requirements/_base.in typing-extensions==3.10.0.2 @@ -221,7 +294,12 @@ urllib3==1.26.6 # -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/service-library/requirements/./../../../requirements/constraints.txt # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../requirements/constraints.txt # -c requirements/../../../requirements/constraints.txt + # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/_migration.in # requests uvicorn==0.13.4 # via @@ -230,17 +308,27 @@ uvicorn==0.13.4 # -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/service-library/requirements/./../../../requirements/constraints.txt # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../requirements/constraints.txt # -c requirements/../../../requirements/constraints.txt # -r requirements/_base.in +watchdog==2.1.5 + # via -r requirements/_base.in websocket-client==0.59.0 # via + # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/_migration.in # docker # docker-compose yarl==1.6.3 # via + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/_base.txt # -r requirements/../../../packages/postgres-database/requirements/_base.in + # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/_base.in # aiohttp zipp==3.6.0 + # via importlib-resources # via # importlib-metadata # importlib-resources diff --git a/services/dynamic-sidecar/requirements/_test.txt b/services/dynamic-sidecar/requirements/_test.txt index f11b97c1cd0..e0526082a3a 100644 --- a/services/dynamic-sidecar/requirements/_test.txt +++ b/services/dynamic-sidecar/requirements/_test.txt @@ -33,13 +33,17 @@ multidict==5.1.0 # -c requirements/_base.txt # async-asgi-testclient packaging==21.0 - # via pytest + # via + # -c requirements/_base.txt + # pytest pluggy==1.0.0 # via pytest py==1.10.0 # via pytest pyparsing==2.4.7 - # via packaging + # via + # -c requirements/_base.txt + # packaging pytest==6.2.5 # via # -r requirements/_test.in diff --git a/services/dynamic-sidecar/requirements/ci.txt b/services/dynamic-sidecar/requirements/ci.txt index 7cc733816e1..d86c57e8d32 100644 --- a/services/dynamic-sidecar/requirements/ci.txt +++ b/services/dynamic-sidecar/requirements/ci.txt @@ -14,6 +14,7 @@ ../../packages/models-library/ ../../packages/postgres-database/ ../../packages/pytest-simcore/ +../../packages/simcore-sdk ../../packages/service-library[fastapi] ../../packages/settings-library/ diff --git a/services/dynamic-sidecar/requirements/dev.txt b/services/dynamic-sidecar/requirements/dev.txt index d1a421edfa8..2d1c00661ed 100644 --- a/services/dynamic-sidecar/requirements/dev.txt +++ b/services/dynamic-sidecar/requirements/dev.txt @@ -15,6 +15,7 @@ --editable ../../packages/models-library --editable ../../packages/postgres-database/ --editable ../../packages/pytest-simcore/ +--editable ../../packages/simcore-sdk --editable ../../packages/service-library[fastapi] --editable ../../packages/settings-library diff --git a/services/dynamic-sidecar/requirements/prod.txt b/services/dynamic-sidecar/requirements/prod.txt index bd3efdd926e..a30978c96a0 100644 --- a/services/dynamic-sidecar/requirements/prod.txt +++ b/services/dynamic-sidecar/requirements/prod.txt @@ -12,6 +12,7 @@ # installs this repo's packages ../../packages/models-library/ ../../packages/postgres-database/ +../../packages/simcore-sdk ../../packages/service-library[fastapi] ../../packages/settings-library/ diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/_meta.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/_meta.py index ca360f4d9bc..70789954348 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/_meta.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/_meta.py @@ -1,6 +1,32 @@ +""" Package Metadata + +""" +from contextlib import suppress + import pkg_resources -current_distribution = pkg_resources.get_distribution("simcore_service_dynamic_sidecar") +_current_distribution = pkg_resources.get_distribution( + "simcore_service_dynamic_sidecar" +) + +PROJECT_NAME: str = _current_distribution.project_name + +API_VERSION: str = _current_distribution.version +MAJOR, MINOR, PATCH = _current_distribution.version.split(".") +API_VTAG: str = f"v{MAJOR}" + +__version__ = _current_distribution.version + + +def get_summary() -> str: + with suppress(Exception): + try: + metadata = _current_distribution.get_metadata_lines("METADATA") + except FileNotFoundError: + metadata = _current_distribution.get_metadata_lines("PKG-INFO") + + return next(x.split(":") for x in metadata if x.startswith("Summary:"))[-1] + return "" + -api_vtag = "v1" -__version__ = current_distribution.version +SUMMARY: str = get_summary() diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/api/_routing.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/api/_routing.py index 4b4829063f4..c158a4b2a92 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/api/_routing.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/api/_routing.py @@ -2,15 +2,13 @@ from fastapi import APIRouter -from .._meta import api_vtag +from .._meta import API_VTAG from .containers import containers_router from .health import health_router -from .mocked import mocked_router # setup and register all routes here form different modules main_router = APIRouter() main_router.include_router(health_router) -main_router.include_router(containers_router, prefix=f"/{api_vtag}") -main_router.include_router(mocked_router) +main_router.include_router(containers_router, prefix=f"/{API_VTAG}") __all__ = ["main_router"] diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/api/containers.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/api/containers.py index d2e8566108c..ce9936365d6 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/api/containers.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/api/containers.py @@ -3,7 +3,8 @@ import json import logging import traceback -from typing import Any, Dict, List, Union +from collections import deque +from typing import Any, Awaitable, Deque, Dict, List, Optional, Union from fastapi import ( APIRouter, @@ -12,9 +13,11 @@ HTTPException, Query, Request, + Response, status, ) from fastapi.responses import PlainTextResponse +from servicelib.utils import logged_gather from ..core.dependencies import get_application_health, get_settings, get_shared_store from ..core.settings import DynamicSidecarSettings @@ -27,6 +30,9 @@ ) from ..models.domains.shared_store import SharedStore from ..models.schemas.application_health import ApplicationHealth +from ..modules import nodeports +from ..modules.data_manager import pull_path_if_exists, upload_path_if_exists +from ..modules.mounted_fs import MountedVolumes, get_mounted_volumes logger = logging.getLogger(__name__) @@ -216,10 +222,8 @@ async def get_container_logs( description="Enabling this parameter will include timestamps in logs", ), shared_store: SharedStore = Depends(get_shared_store), -) -> Union[str, Dict[str, Any]]: +) -> List[str]: """Returns the logs of a given container if found""" - # TODO: remove from here and dump directly into the logs of this service - # do this in PR#1887 _raise_if_container_is_missing(id, shared_store.container_names) async with docker_client() as docker: @@ -229,7 +233,7 @@ async def get_container_logs( if timestamps: args["timestamps"] = True - container_logs: str = await container_instance.log(**args) + container_logs: List[str] = await container_instance.log(**args) return container_logs @@ -314,4 +318,85 @@ async def inspect_container( return inspect_result +@containers_router.post( + "/containers/state:restore", + summary="Restores the state of the dynamic service", + response_model=None, + status_code=status.HTTP_204_NO_CONTENT, +) +async def restore_state() -> Response: + """ + When restoring the state: + - pull inputs via nodeports + - pull all the extra state paths + """ + mounted_volumes: MountedVolumes = get_mounted_volumes() + + awaitables: Deque[Awaitable[Optional[Any]]] = deque() + + for state_path in mounted_volumes.disk_state_paths(): + logger.debug("Downloading state %s", state_path) + awaitables.append(pull_path_if_exists(state_path)) + + await logged_gather(*awaitables) + + # SEE https://github.com/tiangolo/fastapi/issues/2253 + return Response(status_code=status.HTTP_204_NO_CONTENT) + + +@containers_router.post( + "/containers/state:save", + summary="Stores the state of the dynamic service", + response_model=None, + status_code=status.HTTP_204_NO_CONTENT, +) +async def save_state() -> Response: + mounted_volumes: MountedVolumes = get_mounted_volumes() + + awaitables: Deque[Awaitable[Optional[Any]]] = deque() + + for state_path in mounted_volumes.disk_state_paths(): + logger.debug("Saving state %s", state_path) + awaitables.append(upload_path_if_exists(state_path)) + + await logged_gather(*awaitables) + + # SEE https://github.com/tiangolo/fastapi/issues/2253 + return Response(status_code=status.HTTP_204_NO_CONTENT) + + +@containers_router.post( + "/containers/ports/inputs:pull", + summary="Pull input ports data", + response_model=None, + status_code=status.HTTP_200_OK, +) +async def pull_input_ports(port_keys: Optional[List[str]] = None) -> int: + port_keys = [] if port_keys is None else port_keys + mounted_volumes: MountedVolumes = get_mounted_volumes() + + transferred_bytes = await nodeports.download_inputs( + mounted_volumes.disk_inputs_path, port_keys=port_keys + ) + return transferred_bytes + + +@containers_router.post( + "/containers/ports/outputs:push", + summary="Push output ports data", + response_model=None, + status_code=status.HTTP_204_NO_CONTENT, +) +async def push_output_ports(port_keys: Optional[List[str]] = None) -> Response: + port_keys = [] if port_keys is None else port_keys + mounted_volumes: MountedVolumes = get_mounted_volumes() + + await nodeports.upload_outputs( + mounted_volumes.disk_outputs_path, port_keys=port_keys + ) + + # SEE https://github.com/tiangolo/fastapi/issues/2253 + return Response(status_code=status.HTTP_204_NO_CONTENT) + + __all__ = ["containers_router"] diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/api/mocked.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/api/mocked.py deleted file mode 100644 index 6c96d172519..00000000000 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/api/mocked.py +++ /dev/null @@ -1,46 +0,0 @@ -""" -All the functions is this module are mocked out -because they are called by the frontend. -Avoids raising errors in the service. -""" - -import logging - -from fastapi import APIRouter - -logger = logging.getLogger(__name__) - -mocked_router = APIRouter(tags=["Mocked frontend calls"]) - - -@mocked_router.post("/push") -async def ignored_push_post() -> str: - logger.warning("ignoring call POST /push from frontend") - return "" - - -@mocked_router.get("/retrieve") -async def ignored_port_data_load() -> str: - logger.warning("ignoring call GET /retrive from frontend") - return "" - - -@mocked_router.post("/retrieve") -async def ignored_port_data_save() -> str: - logger.warning("ignoring call POST /retrive from frontend") - return "" - - -@mocked_router.get("/state") -async def ignored_load_service_state_state() -> str: - logger.warning("ignoring call GET /state from frontend") - return "" - - -@mocked_router.post("/state") -async def ignored_save_service_state_state() -> str: - logger.warning("ignoring call POST /state from frontend") - return "" - - -__all__ = ["mocked_router"] diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/application.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/application.py index 10a577cdb3b..0d46ac8726d 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/application.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/application.py @@ -3,10 +3,14 @@ from fastapi import FastAPI -from .._meta import __version__, api_vtag +from .._meta import API_VTAG, __version__ from ..api import main_router from ..models.domains.shared_store import SharedStore from ..models.schemas.application_health import ApplicationHealth +from ..modules.directory_watcher import ( + setup_directory_watcher, + teardown_directory_watcher, +) from .remote_debug import setup as remote_debug_setup from .settings import DynamicSidecarSettings from .shared_handlers import on_shutdown_handler @@ -46,8 +50,8 @@ def assemble_application() -> FastAPI: logger.debug(dynamic_sidecar_settings.json(indent=2)) application = FastAPI( - debug=dynamic_sidecar_settings.debug, - openapi_url=f"/api/{api_vtag}/openapi.json", + debug=dynamic_sidecar_settings.DEBUG, + openapi_url=f"/api/{API_VTAG}/openapi.json", docs_url="/dev/doc", ) @@ -69,13 +73,15 @@ def create_start_app_handler( app: FastAPI, ) -> Callable[[], Coroutine[Any, Any, None]]: async def on_startup() -> None: - await login_registry(app.state.settings.registry) + await setup_directory_watcher() + await login_registry(app.state.settings.REGISTRY_SETTINGS) print(WELCOME_MSG, flush=True) return on_startup # setting up handler for lifecycle async def on_shutdown() -> None: + await teardown_directory_watcher() await on_shutdown_handler(application) logger.info("shutdown cleanup completed") diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/settings.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/settings.py index 2888be439b0..897ec6126ee 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/settings.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/settings.py @@ -1,8 +1,14 @@ import logging -from typing import Any, Optional +from functools import lru_cache +from pathlib import Path +from typing import Any, List, Optional from models_library.basic_types import BootModeEnum, PortInt -from pydantic import BaseSettings, Field, PositiveInt, validator +from models_library.projects import ProjectID +from models_library.projects_nodes import NodeID +from models_library.users import UserID +from pydantic import Field, PositiveInt, validator +from settings_library.base import BaseSettings from settings_library.docker_registry import RegistrySettings @@ -10,20 +16,19 @@ class DynamicSidecarSettings(BaseSettings): @classmethod def create(cls, **settings_kwargs: Any) -> "DynamicSidecarSettings": return cls( - registry=RegistrySettings(), + REGISTRY_SETTINGS=RegistrySettings(), **settings_kwargs, ) - boot_mode: Optional[BootModeEnum] = Field( + SC_BOOT_MODE: Optional[BootModeEnum] = Field( ..., description="boot mode helps determine if in development mode or normal operation", - env="SC_BOOT_MODE", ) # LOGGING - log_level_name: str = Field("DEBUG", env="LOG_LEVEL") + LOG_LEVEL: str = Field("DEBUG") - @validator("log_level_name") + @validator("LOG_LEVEL") @classmethod def match_logging_level(cls, v: str) -> str: try: @@ -33,15 +38,15 @@ def match_logging_level(cls, v: str) -> str: return v.upper() # SERVICE SERVER (see : https://www.uvicorn.org/settings/) - host: str = Field( + DYNAMIC_SIDECAR_HOST: str = Field( "0.0.0.0", # nosec description="host where to bind the application on which to serve", ) - port: PortInt = Field( + DYNAMIC_SIDECAR_PORT: PortInt = Field( 8000, description="port where the server will be currently serving" ) - compose_namespace: str = Field( + DYNAMIC_SIDECAR_COMPOSE_NAMESPACE: str = Field( ..., description=( "To avoid collisions when scheduling on the same node, this " @@ -49,11 +54,11 @@ def match_logging_level(cls, v: str) -> str: ), ) - max_combined_container_name_length: PositiveInt = Field( + DYNAMIC_SIDECAR_MAX_COMBINED_CONTAINER_NAME_LENGTH: PositiveInt = Field( 63, description="the container name which will be used as hostname" ) - stop_and_remove_timeout: PositiveInt = Field( + DYNAMIC_SIDECAR_STOP_AND_REMOVE_TIMEOUT: PositiveInt = Field( 5, description=( "When receiving SIGTERM the process has 10 seconds to cleanup its children " @@ -61,31 +66,45 @@ def match_logging_level(cls, v: str) -> str: ), ) - debug: bool = Field( + DEBUG: bool = Field( False, description="If set to True the application will boot into debug mode", - env="DEBUG", ) - remote_debug_port: PortInt = Field( + DYNAMIC_SIDECAR_REMOTE_DEBUG_PORT: PortInt = Field( 3000, description="ptsvd remote debugger starting port" ) - docker_compose_down_timeout: PositiveInt = Field( + DYNAMIC_SIDECAR_DOCKER_COMPOSE_DOWN_TIMEOUT: PositiveInt = Field( 15, description="used during shutdown when containers swapend will be removed" ) - registry: RegistrySettings + DY_SIDECAR_PATH_INPUTS: Path = Field( + ..., description="path where to expect the inputs folder" + ) + DY_SIDECAR_PATH_OUTPUTS: Path = Field( + ..., description="path where to expect the outputs folder" + ) + DY_SIDECAR_STATE_PATHS: List[Path] = Field( + ..., description="list of additional paths to be synced" + ) + DY_SIDECAR_USER_ID: UserID + DY_SIDECAR_PROJECT_ID: ProjectID + DY_SIDECAR_NODE_ID: NodeID + + REGISTRY_SETTINGS: RegistrySettings @property def is_development_mode(self) -> bool: """If in development mode this will be True""" - return self.boot_mode is BootModeEnum.DEVELOPMENT + return self.SC_BOOT_MODE is BootModeEnum.DEVELOPMENT @property def loglevel(self) -> int: - return int(getattr(logging, self.log_level_name)) + return int(getattr(logging, self.LOG_LEVEL)) + - class Config: - case_sensitive = False - env_prefix = "DYNAMIC_SIDECAR_" +@lru_cache +def get_settings() -> DynamicSidecarSettings: + """used outside the context of a request""" + return DynamicSidecarSettings.create() diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/shared_handlers.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/shared_handlers.py index 8b10ebe0e83..73cc3399ea7 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/shared_handlers.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/shared_handlers.py @@ -22,8 +22,8 @@ async def write_file_and_run_command( async with write_to_tmp_file(file_content) as file_path: formatted_command = command.format( file_path=file_path, - project=settings.compose_namespace, - stop_and_remove_timeout=settings.stop_and_remove_timeout, + project=settings.DYNAMIC_SIDECAR_COMPOSE_NAMESPACE, + stop_and_remove_timeout=settings.DYNAMIC_SIDECAR_STOP_AND_REMOVE_TIMEOUT, ) logger.debug("Will run command\n'%s':\n%s", formatted_command, file_content) return await async_command(formatted_command, command_timeout) @@ -39,7 +39,7 @@ async def remove_the_compose_spec( command = ( "docker-compose -p {project} -f {file_path} " - "down --remove-orphans -t {stop_and_remove_timeout}" + "down --volumes --remove-orphans -t {stop_and_remove_timeout}" ) result = await write_file_and_run_command( settings=settings, @@ -62,6 +62,6 @@ async def on_shutdown_handler(app: FastAPI) -> None: result = await remove_the_compose_spec( shared_store=shared_store, settings=settings, - command_timeout=settings.docker_compose_down_timeout, + command_timeout=settings.DYNAMIC_SIDECAR_DOCKER_COMPOSE_DOWN_TIMEOUT, ) logging.info("Container removal did_succeed=%s\n%s", result[0], result[1]) diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/utils.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/utils.py index 1dadd2d78e1..6026ee8657f 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/utils.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/utils.py @@ -20,15 +20,15 @@ logger = logging.getLogger(__name__) -async def login_registry(settings: RegistrySettings) -> None: - def create_docker_config_file(settings: RegistrySettings) -> None: +async def login_registry(registry_settings: RegistrySettings) -> None: + def create_docker_config_file(registry_settings: RegistrySettings) -> None: + user = registry_settings.REGISTRY_USER + password = registry_settings.REGISTRY_PW.get_secret_value() docker_config = { "auths": { - f"{settings.resolved_registry_url}": { + f"{registry_settings.resolved_registry_url}": { "auth": base64.b64encode( - f"{settings.REGISTRY_USER}:{settings.REGISTRY_PW.get_secret_value()}".encode( - "utf-8" - ) + f"{user}:{password}".encode("utf-8") ).decode("utf-8") } } @@ -37,9 +37,9 @@ def create_docker_config_file(settings: RegistrySettings) -> None: conf_file.parent.mkdir(exist_ok=True, parents=True) conf_file.write_text(json.dumps(docker_config)) - if settings.REGISTRY_AUTH: + if registry_settings.REGISTRY_AUTH: await asyncio.get_event_loop().run_in_executor( - None, create_docker_config_file, settings + None, create_docker_config_file, registry_settings ) diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/validation.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/validation.py index 4f12d63ca84..28fa38b02d2 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/validation.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/validation.py @@ -6,6 +6,7 @@ import yaml +from ..modules.mounted_fs import MountedVolumes, get_mounted_volumes from .settings import DynamicSidecarSettings from .shared_handlers import write_file_and_run_command @@ -25,14 +26,14 @@ def _assemble_container_name( index: int, ) -> str: strings_to_use = [ - settings.compose_namespace, + settings.DYNAMIC_SIDECAR_COMPOSE_NAMESPACE, str(index), user_given_container_name, service_key, ] container_name = "-".join([x for x in strings_to_use if len(x) > 0])[ - : settings.max_combined_container_name_length + : settings.DYNAMIC_SIDECAR_MAX_COMBINED_CONTAINER_NAME_LENGTH ].replace("_", "-") return container_name @@ -181,6 +182,7 @@ async def validate_compose_spec( spec_services_to_container_name: Dict[str, str] = {} + mounted_volumes: MountedVolumes = get_mounted_volumes() spec_services = parsed_compose_spec["services"] for index, service in enumerate(spec_services): service_content = spec_services[service] @@ -201,6 +203,28 @@ async def validate_compose_spec( settings_env_vars=service_settings_env_vars, ) + # inject paths to be mounted + service_volumes = service_content.get("volumes", []) + + service_volumes.append(mounted_volumes.get_inputs_docker_volume()) + service_volumes.append(mounted_volumes.get_outputs_docker_volume()) + for ( + state_paths_docker_volume + ) in mounted_volumes.get_state_paths_docker_volumes(): + service_volumes.append(state_paths_docker_volume) + + service_content["volumes"] = service_volumes + + # inject volumes creation in spec + volumes = parsed_compose_spec.get("volumes", {}) + + volumes[mounted_volumes.volume_name_inputs] = dict(external=True) + volumes[mounted_volumes.volume_name_outputs] = dict(external=True) + for volume_name_state_path in mounted_volumes.volume_name_state_paths(): + volumes[volume_name_state_path] = dict(external=True) + + parsed_compose_spec["volumes"] = volumes + # if more then one container is defined, add an "backend" network if len(spec_services) > 1: _inject_backend_networking(parsed_compose_spec) diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/__init__.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/data_manager.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/data_manager.py new file mode 100644 index 00000000000..f4831cae1fc --- /dev/null +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/data_manager.py @@ -0,0 +1,79 @@ +import logging +import shutil +import tempfile +from contextlib import asynccontextmanager +from pathlib import Path +from typing import AsyncIterator + +from servicelib.archiving_utils import archive_dir +from servicelib.pools import async_on_threadpool +from simcore_sdk.node_data import data_manager +from simcore_service_dynamic_sidecar.core.settings import ( + DynamicSidecarSettings, + get_settings, +) + +logger = logging.getLogger(__name__) + + +async def pull_path_if_exists(path: Path) -> None: + """ + If the path already exist in storage pull it. Otherwise it is assumed + this is the first time the service starts. + + In each and every other case an error is raised and logged + """ + settings: DynamicSidecarSettings = get_settings() + + if not await data_manager.is_file_present_in_storage( + user_id=settings.DY_SIDECAR_USER_ID, + project_id=str(settings.DY_SIDECAR_PROJECT_ID), + node_uuid=str(settings.DY_SIDECAR_NODE_ID), + file_path=path, + ): + logger.info( + "File '%s' is not present in storage service, will skip.", str(path) + ) + return + + await data_manager.pull( + user_id=settings.DY_SIDECAR_USER_ID, + project_id=str(settings.DY_SIDECAR_PROJECT_ID), + node_uuid=str(settings.DY_SIDECAR_NODE_ID), + file_or_folder=path, + ) + logger.info("Finished pulling and extracting %s", str(path)) + + +@asynccontextmanager +async def _isolated_temp_zip_path(path_to_compress: Path) -> AsyncIterator[Path]: + base_dir = Path(tempfile.mkdtemp()) + zip_temp_name = base_dir / f"{path_to_compress.name}.zip" + try: + yield zip_temp_name + finally: + await async_on_threadpool(lambda: shutil.rmtree(base_dir, ignore_errors=True)) + + +async def upload_path_if_exists(path: Path) -> None: + """ + Zips the path in a temporary directory and uploads to storage + """ + settings: DynamicSidecarSettings = get_settings() + # pylint: disable=unnecessary-comprehension + logger.info("Files in %s: %s", path, [x for x in path.rglob("*")]) + + async with _isolated_temp_zip_path(path) as archive_path: + await archive_dir( + dir_to_compress=path, + destination=archive_path, + compress=False, + store_relative_path=True, + ) + await data_manager.push( + user_id=settings.DY_SIDECAR_USER_ID, + project_id=str(settings.DY_SIDECAR_PROJECT_ID), + node_uuid=str(settings.DY_SIDECAR_NODE_ID), + file_or_folder=path, + ) + logger.info("Finished upload of %s", path) diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/directory_watcher.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/directory_watcher.py new file mode 100644 index 00000000000..2ff2ece79ca --- /dev/null +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/directory_watcher.py @@ -0,0 +1,192 @@ +import asyncio +import logging +import time +from asyncio import AbstractEventLoop +from collections import deque +from functools import wraps +from os import name +from pathlib import Path +from typing import Any, Awaitable, Callable, Deque, Optional + +from servicelib.utils import logged_gather +from simcore_service_dynamic_sidecar.modules.nodeports import ( + dispatch_update_for_directory, +) +from watchdog.events import FileSystemEvent, FileSystemEventHandler +from watchdog.observers import Observer + +from .mounted_fs import MountedVolumes, setup_mounted_fs + +DETECTION_INTERVAL: float = 1.0 +TASK_NAME_FOR_CLEANUP = f"{name}.InvokeTask" + +logger = logging.getLogger(__name__) + +_dir_watcher: Optional["DirectoryWatcherObservers"] = None + + +class AsyncLockedFloat: + __slots__ = ("_lock", "_value") + + def __init__(self, initial_value: Optional[float] = None): + self._lock = asyncio.Lock() + self._value: Optional[float] = initial_value + + async def set_value(self, value: float) -> None: + async with self._lock: + self._value = value + + async def get_value(self) -> Optional[float]: + async with self._lock: + return self._value + + +def async_run_once_after_event_chain( + detection_interval: float, +) -> Callable[[Any], Optional[Any]]: + """ + The function's call is delayed by a period equal to the + `detection_interval` and multiple calls during this + interval will be ignored and will reset the delay. + + param: detection_interval the amount of time between + returns: decorator to be applied to async functions + """ + + def internal( + decorated_function: Callable[[Any], Optional[Any]] + ) -> Callable[[Any], Optional[Any]]: + last = AsyncLockedFloat(initial_value=None) + + @wraps(decorated_function) + async def wrapper(*args: Any, **kwargs: Any) -> Optional[Any]: + # skipping the first time the event chain starts + if await last.get_value() is None: + await last.set_value(time.time()) + return None + + await last.set_value(time.time()) + + last_read = await last.get_value() + await asyncio.sleep(detection_interval) + + if last_read == await last.get_value(): + return await decorated_function(*args, **kwargs) # type: ignore + + return None + + return wrapper + + return internal + + +async def push_directory_via_nodeports(directory_path: Path) -> None: + await dispatch_update_for_directory(directory_path) + + +@async_run_once_after_event_chain(detection_interval=DETECTION_INTERVAL) +async def invoke_push_directory_via_nodeports(directory_path: Path) -> None: + await push_directory_via_nodeports(directory_path) + + +def trigger_async_invoke_push_mapped_data( + loop: AbstractEventLoop, directory_path: Path +) -> None: + loop.create_task( + invoke_push_directory_via_nodeports(directory_path), name=TASK_NAME_FOR_CLEANUP + ) + + +class UnifyingEventHandler(FileSystemEventHandler): + def __init__(self, loop: AbstractEventLoop, directory_path: Path): + super().__init__() + + self.loop: AbstractEventLoop = loop + self.directory_path: Path = directory_path + + def on_any_event(self, event: FileSystemEvent) -> None: + super().on_any_event(event) + trigger_async_invoke_push_mapped_data(self.loop, self.directory_path) + + +class DirectoryWatcherObservers: + """Used to keep tack of observer threads""" + + def __init__( + self, + ) -> None: + self._observers: Deque[Observer] = deque() + + self._keep_running: bool = True + self._blocking_task: Optional[Awaitable[Any]] = None + + def observe_directory(self, directory_path: Path, recursive: bool = True) -> None: + path = directory_path.absolute() + outputs_event_handle = UnifyingEventHandler( + loop=asyncio.get_event_loop(), directory_path=path + ) + observer = Observer() + observer.schedule(outputs_event_handle, str(path), recursive=recursive) + self._observers.append(observer) + + async def _runner(self) -> None: + try: + for observer in self._observers: + observer.start() + + while self._keep_running: + # watchdog internally uses 1 sec interval to detect events + # sleeping for less is useless. + # If this value is bigger then the DETECTION_INTERVAL + # the result will not be as expected. Keep sleep to 1 second + await asyncio.sleep(1) + + except Exception: # pylint: disable=broad-except + logger.exception("Watchers failed upon initialization") + finally: + for observer in self._observers: + observer.stop() + observer.join() + + def start(self) -> None: + if self._blocking_task is None: + self._blocking_task = asyncio.create_task(self._runner()) + else: + logger.warning("Already started, will not start again") + + async def stop(self) -> None: + """cleans up spawned tasks which might be pending""" + self._keep_running = False + if self._blocking_task: + try: + await self._blocking_task + self._blocking_task = None + except asyncio.CancelledError: + logger.info("Task was already cancelled") + + # cleanup pending tasks to avoid errors + tasks_to_await: Deque[Awaitable[Any]] = deque() + for task in asyncio.all_tasks(): + if task.get_name() == TASK_NAME_FOR_CLEANUP: + tasks_to_await.append(task) + + # awaiting pending spawned tasks will not raise warnings + await logged_gather(*tasks_to_await) + + +async def setup_directory_watcher() -> None: + global _dir_watcher # pylint: disable=global-statement + + mounted_volumes: MountedVolumes = setup_mounted_fs() + + _dir_watcher = DirectoryWatcherObservers() + _dir_watcher.observe_directory(mounted_volumes.disk_outputs_path) + _dir_watcher.start() + + +async def teardown_directory_watcher() -> None: + if _dir_watcher is not None: + await _dir_watcher.stop() + + +__all__ = ["setup_directory_watcher", "teardown_directory_watcher"] diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/mounted_fs.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/mounted_fs.py new file mode 100644 index 00000000000..0f9209b3fa5 --- /dev/null +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/mounted_fs.py @@ -0,0 +1,120 @@ +import os +from functools import cached_property +from pathlib import Path +from typing import Generator, List, Optional + +from simcore_service_dynamic_sidecar.core.settings import ( + DynamicSidecarSettings, + get_settings, +) + +DY_VOLUMES = Path("/dy-volumes") + +_mounted_volumes: Optional["MountedVolumes"] = None + + +def _ensure_path(path: Path) -> Path: + path.mkdir(parents=True, exist_ok=True) + return path + + +def _name_from_full_path(path: Path) -> str: + """transforms: /path/to/a/file -> _path_to_a_file""" + return str(path).replace(os.sep, "_") + + +class MountedVolumes: + """ + The inputs and outputs directories are created and by the dynamic-sidecar + and mounted into all started containers at the specified path. + + Locally, on its disk, the dynamic-sidecar ensures the `inputs` and + `outputs` directories are created in the external volume of name + `dy-sidecar_UUID` in the `/dy-volumes` path. + Eg: - /dy-sidecar_UUID_inputs:/inputs-dir + - /dy-sidecar_UUID_outputs:/outputs-dir + """ + + def __init__( + self, inputs_path: Path, outputs_path: Path, state_paths: List[Path] + ) -> None: + self.inputs_path: Path = inputs_path + self.outputs_path: Path = outputs_path + self.state_paths: List[Path] = state_paths + + self._ensure_directories() + + @cached_property + def volume_name_inputs(self) -> str: + """Same name as the namespace, to easily track components""" + compose_namespace = get_settings().DYNAMIC_SIDECAR_COMPOSE_NAMESPACE + return f"{compose_namespace}_inputs" + + @cached_property + def volume_name_outputs(self) -> str: + compose_namespace = get_settings().DYNAMIC_SIDECAR_COMPOSE_NAMESPACE + return f"{compose_namespace}_outputs" + + def volume_name_state_paths(self) -> Generator[str, None, None]: + compose_namespace = get_settings().DYNAMIC_SIDECAR_COMPOSE_NAMESPACE + for state_path in self.state_paths: + yield f"{compose_namespace}{_name_from_full_path(state_path)}" + + @cached_property + def disk_inputs_path(self) -> Path: + return _ensure_path(DY_VOLUMES / "inputs") + + @cached_property + def disk_outputs_path(self) -> Path: + return _ensure_path(DY_VOLUMES / "outputs") + + def disk_state_paths(self) -> Generator[Path, None, None]: + for state_path in self.state_paths: + yield _ensure_path(DY_VOLUMES / _name_from_full_path(state_path).strip("_")) + + def _ensure_directories(self) -> None: + """ + Creates the directories on its file system, + these will be mounted elsewere. + """ + _ensure_path(DY_VOLUMES) + self.disk_inputs_path # pylint:disable= pointless-statement + self.disk_outputs_path # pylint:disable= pointless-statement + set(self.disk_state_paths()) + + def get_inputs_docker_volume(self) -> str: + return f"{self.volume_name_inputs}:{self.inputs_path}" + + def get_outputs_docker_volume(self) -> str: + return f"{self.volume_name_outputs}:{self.outputs_path}" + + def get_state_paths_docker_volumes(self) -> Generator[str, None, None]: + for volume_state_path, state_path in zip( + self.volume_name_state_paths(), self.state_paths + ): + yield f"{volume_state_path}:{state_path}" + + +def setup_mounted_fs() -> MountedVolumes: + global _mounted_volumes # pylint: disable=global-statement + + settings: DynamicSidecarSettings = get_settings() + + _mounted_volumes = MountedVolumes( + inputs_path=settings.DY_SIDECAR_PATH_INPUTS, + outputs_path=settings.DY_SIDECAR_PATH_OUTPUTS, + state_paths=settings.DY_SIDECAR_STATE_PATHS, + ) + + return _mounted_volumes + + +def get_mounted_volumes() -> MountedVolumes: + if _mounted_volumes is None: + raise RuntimeError( + f"{MountedVolumes.__name__} was not initialized, did not call setup" + ) + return _mounted_volumes + + +__all__ = ["get_mounted_volumes", "MountedVolumes"] diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py new file mode 100644 index 00000000000..72e860956e4 --- /dev/null +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py @@ -0,0 +1,246 @@ +import json +import logging +import shutil +import sys +import tempfile +import time +import zipfile +from collections import deque +from pathlib import Path +from typing import Coroutine, Deque, Dict, List, Optional, Set, Tuple, cast + +from pydantic import ByteSize +from servicelib.archiving_utils import PrunableFolder, archive_dir, unarchive_dir +from servicelib.async_utils import run_sequentially_in_context +from servicelib.pools import async_on_threadpool +from servicelib.utils import logged_gather +from simcore_sdk import node_ports_v2 +from simcore_sdk.node_ports_v2 import Nodeports, Port +from simcore_sdk.node_ports_v2.links import ItemConcreteValue +from simcore_service_dynamic_sidecar.core.settings import ( + DynamicSidecarSettings, + get_settings, +) + +_FILE_TYPE_PREFIX = "data:" +_KEY_VALUE_FILE_NAME = "key_values.json" + +logger = logging.getLogger(__name__) + +# OUTPUTS section + + +def _get_size_of_value(value: ItemConcreteValue) -> int: + if isinstance(value, Path): + size_bytes = value.stat().st_size + return size_bytes + return sys.getsizeof(value) + + +@run_sequentially_in_context() +async def upload_outputs(outputs_path: Path, port_keys: List[str]) -> None: + """calls to this function will get queued and invoked in sequence""" + # pylint: disable=too-many-branches + logger.info("uploading data to simcore...") + start_time = time.perf_counter() + + settings: DynamicSidecarSettings = get_settings() + PORTS: Nodeports = await node_ports_v2.ports( + user_id=settings.DY_SIDECAR_USER_ID, + project_id=str(settings.DY_SIDECAR_PROJECT_ID), + node_uuid=str(settings.DY_SIDECAR_NODE_ID), + ) + + # let's gather the tasks + temp_files: List[Path] = [] + ports_values: Dict[str, ItemConcreteValue] = {} + archiving_tasks: Deque[Coroutine[None, None, None]] = deque() + + for port in (await PORTS.outputs).values(): + logger.info("Checking port %s", port.key) + if port_keys and port.key not in port_keys: + continue + logger.debug( + "uploading data to port '%s' with value '%s'...", port.key, port.value + ) + if _FILE_TYPE_PREFIX in port.property_type: + src_folder = outputs_path / port.key + files_and_folders_list = list(src_folder.rglob("*")) + + if not files_and_folders_list: + ports_values[port.key] = None + continue + + if len(files_and_folders_list) == 1 and files_and_folders_list[0].is_file(): + # special case, direct upload + ports_values[port.key] = files_and_folders_list[0] + continue + + # generic case let's create an archive + # only the filtered out files will be zipped + tmp_file = Path(tempfile.mkdtemp()) / f"{src_folder.stem}.zip" + temp_files.append(tmp_file) + + # when having multiple directories it is important to + # run the compression in parallel to guarantee better performance + archiving_tasks.append( + archive_dir( + dir_to_compress=src_folder, + destination=tmp_file, + compress=False, + store_relative_path=True, + ) + ) + ports_values[port.key] = tmp_file + else: + data_file = outputs_path / _KEY_VALUE_FILE_NAME + if data_file.exists(): + data = json.loads(data_file.read_text()) + if port.key in data and data[port.key] is not None: + ports_values[port.key] = data[port.key] + else: + logger.debug("Port %s not found in %s", port.key, data) + else: + logger.debug("No file %s to fetch port values from", data_file) + + try: + if archiving_tasks: + await logged_gather(*archiving_tasks) + await PORTS.set_multiple(ports_values) + finally: + # clean up possible compressed files + for file_path in temp_files: + await async_on_threadpool( + # pylint: disable=cell-var-from-loop + lambda: shutil.rmtree(file_path.parent, ignore_errors=True) + ) + + elapsed_time = time.perf_counter() - start_time + total_bytes = sum([_get_size_of_value(x) for x in ports_values.values()]) + logger.info("Uploaded %s bytes in %s seconds", total_bytes, elapsed_time) + + +async def dispatch_update_for_directory(directory_path: Path) -> None: + logger.info("Uploading data for directory %s", directory_path) + # TODO: how to figure out from directory_path which is the correct target to upload + await upload_outputs(directory_path, []) + + +# INPUTS section + + +async def _get_data_from_port(port: Port) -> Tuple[Port, ItemConcreteValue]: + tag = f"[{port.key}] " + logger.info("%s transfer started for %s", tag, port.key) + start_time = time.perf_counter() + ret = await port.get() + elapsed_time = time.perf_counter() - start_time + logger.info("%s transfer completed (=%s) in %3.2fs", tag, ret, elapsed_time) + if isinstance(ret, Path): + size_mb = ret.stat().st_size / 1024 / 1024 + logger.info( + "%s %s: data size: %sMB, transfer rate %sMB/s", + tag, + ret.name, + size_mb, + size_mb / elapsed_time, + ) + return (port, ret) + + +async def download_inputs(inputs_path: Path, port_keys: List[str]) -> ByteSize: + logger.info("retrieving data from simcore...") + start_time = time.perf_counter() + + settings: DynamicSidecarSettings = get_settings() + PORTS: Nodeports = await node_ports_v2.ports( + user_id=settings.DY_SIDECAR_USER_ID, + project_id=str(settings.DY_SIDECAR_PROJECT_ID), + node_uuid=str(settings.DY_SIDECAR_NODE_ID), + ) + data = {} + + # let's gather all the data + download_tasks = [] + for node_input in (await PORTS.inputs).values(): + # if port_keys contains some keys only download them + logger.info("Checking node %s", node_input.key) + if port_keys and node_input.key not in port_keys: + continue + # collect coroutines + download_tasks.append(_get_data_from_port(node_input)) + logger.info("retrieving %s data", len(download_tasks)) + + transfer_bytes = 0 + if download_tasks: + # TODO: limit concurrency to avoid saturating storage+db?? + results: List[Tuple[Port, ItemConcreteValue]] = cast( + List[Tuple[Port, ItemConcreteValue]], await logged_gather(*download_tasks) + ) + logger.info("completed download %s", results) + for port, value in results: + + data[port.key] = {"key": port.key, "value": value} + + if _FILE_TYPE_PREFIX in port.property_type: + + # if there are files, move them to the final destination + downloaded_file: Optional[Path] = cast(Optional[Path], value) + dest_path: Path = inputs_path / port.key + + if not downloaded_file or not downloaded_file.exists(): + # the link may be empty + continue + + transfer_bytes = transfer_bytes + downloaded_file.stat().st_size + + # in case of valid file, it is either uncompressed and/or moved to the final directory + logger.info("creating directory %s", dest_path) + dest_path.mkdir(exist_ok=True, parents=True) + data[port.key] = {"key": port.key, "value": str(dest_path)} + + if zipfile.is_zipfile(downloaded_file): + + dest_folder = PrunableFolder(dest_path) + + # unzip updated data to dest_path + logger.info("unzipping %s", downloaded_file) + unarchived: Set[Path] = await unarchive_dir( + archive_to_extract=downloaded_file, destination_folder=dest_path + ) + + dest_folder.prune(exclude=unarchived) + + logger.info("all unzipped in %s", dest_path) + else: + logger.info("moving %s", downloaded_file) + dest_path = dest_path / Path(downloaded_file).name + await async_on_threadpool( + # pylint: disable=cell-var-from-loop + lambda: shutil.move(str(downloaded_file), dest_path) + ) + logger.info("all moved to %s", dest_path) + else: + transfer_bytes = transfer_bytes + sys.getsizeof(value) + + # create/update the json file with the new values + if data: + data_file = inputs_path / _KEY_VALUE_FILE_NAME + if data_file.exists(): + current_data = json.loads(data_file.read_text()) + # merge data + data = {**current_data, **data} + data_file.write_text(json.dumps(data)) + + transferred = ByteSize(transfer_bytes) + elapsed_time = time.perf_counter() - start_time + logger.info( + "Downloaded %s in %s seconds", + transferred.human_readable(decimal=True), + elapsed_time, + ) + + return transferred + + +__all__ = ["dispatch_update_for_directory", "upload_outputs", "download_inputs"] diff --git a/services/dynamic-sidecar/tests/conftest.py b/services/dynamic-sidecar/tests/conftest.py index 958fedf1dcb..60274f49d8c 100644 --- a/services/dynamic-sidecar/tests/conftest.py +++ b/services/dynamic-sidecar/tests/conftest.py @@ -1,11 +1,15 @@ # pylint: disable=unused-argument # pylint: disable=redefined-outer-name +import asyncio +import json import os import random import sys +import tempfile +import uuid from pathlib import Path -from typing import Any, AsyncGenerator +from typing import Any, AsyncGenerator, Iterator, List from unittest import mock import aiodocker @@ -18,23 +22,99 @@ from simcore_service_dynamic_sidecar.core.shared_handlers import ( write_file_and_run_command, ) +from simcore_service_dynamic_sidecar.core.utils import docker_client from simcore_service_dynamic_sidecar.models.domains.shared_store import SharedStore +from simcore_service_dynamic_sidecar.modules import mounted_fs -@pytest.fixture(scope="module", autouse=True) -def app() -> FastAPI: +@pytest.fixture(scope="module") +def mock_dy_volumes() -> Iterator[Path]: + with tempfile.TemporaryDirectory() as temp_dir: + yield Path(temp_dir) + + +@pytest.fixture(scope="session") +def io_temp_dir() -> Iterator[Path]: + with tempfile.TemporaryDirectory() as temp_dir: + yield Path(temp_dir) + + +@pytest.fixture(scope="session") +def compose_namespace() -> str: + return f"dy-sidecar_{uuid.uuid4()}" + + +@pytest.fixture(scope="session") +def inputs_dir(io_temp_dir: Path) -> Path: + return io_temp_dir / "inputs" + + +@pytest.fixture(scope="session") +def outputs_dir(io_temp_dir: Path) -> Path: + return io_temp_dir / "outputs" + + +@pytest.fixture(scope="session") +def state_paths_dirs(io_temp_dir: Path) -> List[Path]: + return [io_temp_dir / f"dir_{x}" for x in range(4)] + + +@pytest.fixture(scope="module") +def mock_environment( + mock_dy_volumes: Path, + compose_namespace: str, + inputs_dir: Path, + outputs_dir: Path, + state_paths_dirs: List[Path], +) -> Iterator[None]: with mock.patch.dict( os.environ, { "SC_BOOT_MODE": "production", - "DYNAMIC_SIDECAR_compose_namespace": "test-space", - "REGISTRY_auth": "false", - "REGISTRY_user": "test", + "DYNAMIC_SIDECAR_COMPOSE_NAMESPACE": compose_namespace, + "REGISTRY_AUTH": "false", + "REGISTRY_USER": "test", "REGISTRY_PW": "test", "REGISTRY_SSL": "false", + "DY_SIDECAR_PATH_INPUTS": str(inputs_dir), + "DY_SIDECAR_PATH_OUTPUTS": str(outputs_dir), + "DY_SIDECAR_STATE_PATHS": json.dumps([str(x) for x in state_paths_dirs]), + "DY_SIDECAR_USER_ID": "1", + "DY_SIDECAR_PROJECT_ID": f"{uuid.uuid4()}", + "DY_SIDECAR_NODE_ID": f"{uuid.uuid4()}", }, - ): - return assemble_application() + ), mock.patch.object(mounted_fs, "DY_VOLUMES", mock_dy_volumes): + print(os.environ) + yield + + +@pytest.fixture(scope="module") +def app(mock_environment: None) -> FastAPI: + return assemble_application() + + +@pytest.fixture +async def ensure_external_volumes( + compose_namespace: str, state_paths_dirs: List[Path] +) -> AsyncGenerator[None, None]: + """ensures inputs and outputs volumes for the service are present""" + + volume_names = [f"{compose_namespace}_inputs", f"{compose_namespace}_outputs"] + for state_paths_dir in state_paths_dirs: + name_from_path = str(state_paths_dir).replace(os.sep, "_") + volume_names.append(f"{compose_namespace}{name_from_path}") + + async with docker_client() as client: + volumes = await asyncio.gather( + *[ + client.volumes.create({"Name": volume_name}) + for volume_name in volume_names + ] + ) + + yield + + await asyncio.gather(*[volume.delete() for volume in volumes]) @pytest.fixture @@ -44,7 +124,9 @@ async def test_client(app: FastAPI) -> TestClient: @pytest.fixture(autouse=True) -async def cleanup_containers(app: FastAPI) -> AsyncGenerator[None, None]: +async def cleanup_containers( + app: FastAPI, ensure_external_volumes: None +) -> AsyncGenerator[None, None]: yield # run docker compose down here diff --git a/services/dynamic-sidecar/tests/unit/test_api_containers.py b/services/dynamic-sidecar/tests/unit/test_api_containers.py index 5f39d4c2cbd..2748d26e3d6 100644 --- a/services/dynamic-sidecar/tests/unit/test_api_containers.py +++ b/services/dynamic-sidecar/tests/unit/test_api_containers.py @@ -2,6 +2,7 @@ # pylint: disable=unused-argument +import importlib import json from typing import Any, Dict, List @@ -10,7 +11,8 @@ import yaml from async_asgi_testclient import TestClient from fastapi import status -from simcore_service_dynamic_sidecar._meta import api_vtag +from pytest_mock.plugin import MockerFixture +from simcore_service_dynamic_sidecar._meta import API_VTAG from simcore_service_dynamic_sidecar.core.settings import DynamicSidecarSettings from simcore_service_dynamic_sidecar.core.shared_handlers import ( write_file_and_run_command, @@ -23,6 +25,8 @@ pytestmark = pytest.mark.asyncio +# FIXTURES + @pytest.fixture def dynamic_sidecar_network_name() -> str: @@ -79,7 +83,9 @@ async def assert_compose_spec_pulled( docker_ps_names = await _docker_ps_a_container_names() started_containers = [ - x for x in docker_ps_names if x.startswith(settings.compose_namespace) + x + for x in docker_ps_names + if x.startswith(settings.DYNAMIC_SIDECAR_COMPOSE_NAMESPACE) ] assert len(started_containers) == expected_services_count @@ -90,7 +96,7 @@ async def started_containers(test_client: TestClient, compose_spec: str) -> List await assert_compose_spec_pulled(compose_spec, settings) # start containers - response = await test_client.post(f"/{api_vtag}/containers", data=compose_spec) + response = await test_client.post(f"/{API_VTAG}/containers", data=compose_spec) assert response.status_code == status.HTTP_202_ACCEPTED, response.text shared_store: SharedStore = test_client.application.state.shared_store @@ -106,9 +112,49 @@ def not_started_containers() -> List[str]: return [f"missing-container-{i}" for i in range(5)] +@pytest.fixture +def mock_nodeports(mocker: MockerFixture) -> None: + mocker.patch( + "simcore_service_dynamic_sidecar.modules.nodeports.upload_outputs", + return_value=None, + ) + mocker.patch( + "simcore_service_dynamic_sidecar.modules.nodeports.download_inputs", + return_value=42, + ) + + +@pytest.fixture +def mock_data_manager(mocker: MockerFixture) -> None: + mocker.patch( + "simcore_service_dynamic_sidecar.modules.data_manager.upload_path_if_exists", + return_value=None, + ) + mocker.patch( + "simcore_service_dynamic_sidecar.modules.data_manager.pull_path_if_exists", + return_value=None, + ) + + importlib.reload( + importlib.import_module("simcore_service_dynamic_sidecar.api.containers") + ) + + +@pytest.fixture +def mock_port_keys() -> List[str]: + return ["first_port", "second_port"] + + +# TESTS + + +def test_ensure_api_vtag_is_v1() -> None: + assert API_VTAG == "v1" + + async def test_start_containers_wrong_spec(test_client: TestClient) -> None: response = await test_client.post( - f"/{api_vtag}/containers", data={"opsie": "shame on me"} + f"/{API_VTAG}/containers", data={"opsie": "shame on me"} ) assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY assert response.json() == {"detail": "\nProvided yaml is not valid!"} @@ -118,10 +164,10 @@ async def test_start_same_space_twice( test_client: TestClient, compose_spec: str ) -> None: settings: DynamicSidecarSettings = test_client.application.state.settings - settings.compose_namespace = "test_name_space_1" + settings.DYNAMIC_SIDECAR_COMPOSE_NAMESPACE = "test_name_space_1" await assert_compose_spec_pulled(compose_spec, settings) - settings.compose_namespace = "test_name_space_2" + settings.DYNAMIC_SIDECAR_COMPOSE_NAMESPACE = "test_name_space_2" await assert_compose_spec_pulled(compose_spec, settings) @@ -129,7 +175,7 @@ async def test_compose_up( test_client: TestClient, compose_spec: Dict[str, Any] ) -> None: - response = await test_client.post(f"/{api_vtag}/containers", data=compose_spec) + response = await test_client.post(f"/{API_VTAG}/containers", data=compose_spec) assert response.status_code == status.HTTP_202_ACCEPTED, response.text shared_store: SharedStore = test_client.application.state.shared_store container_names = shared_store.container_names @@ -137,7 +183,7 @@ async def test_compose_up( async def test_compose_up_spec_not_provided(test_client: TestClient) -> None: - response = await test_client.post(f"/{api_vtag}/containers") + response = await test_client.post(f"/{API_VTAG}/containers") assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY, response.text assert response.json() == {"detail": "\nProvided yaml is not valid!"} @@ -145,7 +191,7 @@ async def test_compose_up_spec_not_provided(test_client: TestClient) -> None: async def test_compose_up_spec_invalid(test_client: TestClient) -> None: invalid_compose_spec = faker.Faker().text() # pylint: disable=no-member response = await test_client.post( - f"/{api_vtag}/containers", data=invalid_compose_spec + f"/{API_VTAG}/containers", data=invalid_compose_spec ) assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY, response.text assert "Provided yaml is not valid!" in response.text @@ -157,14 +203,14 @@ async def test_containers_down_after_starting( test_client: TestClient, compose_spec: Dict[str, Any] ) -> None: # store spec first - response = await test_client.post(f"/{api_vtag}/containers", data=compose_spec) + response = await test_client.post(f"/{API_VTAG}/containers", data=compose_spec) assert response.status_code == status.HTTP_202_ACCEPTED, response.text shared_store: SharedStore = test_client.application.state.shared_store container_names = shared_store.container_names assert response.json() == container_names response = await test_client.post( - f"/{api_vtag}/containers:down", + f"/{API_VTAG}/containers:down", query_string=dict(command_timeout=DEFAULT_COMMAND_TIMEOUT), ) assert response.status_code == status.HTTP_200_OK, response.text @@ -175,7 +221,7 @@ async def test_containers_down_missing_spec( test_client: TestClient, compose_spec: Dict[str, Any] ) -> None: response = await test_client.post( - f"/{api_vtag}/containers:down", + f"/{API_VTAG}/containers:down", query_string=dict(command_timeout=DEFAULT_COMMAND_TIMEOUT), ) assert response.status_code == status.HTTP_404_NOT_FOUND, response.text @@ -190,9 +236,11 @@ def assert_keys_exist(result: Dict[str, Any]) -> bool: async def test_containers_get( - test_client: TestClient, started_containers: List[str] + test_client: TestClient, + started_containers: List[str], + ensure_external_volumes: None, ) -> None: - response = await test_client.get(f"/{api_vtag}/containers") + response = await test_client.get(f"/{API_VTAG}/containers") assert response.status_code == status.HTTP_200_OK, response.text decoded_response = response.json() @@ -203,10 +251,12 @@ async def test_containers_get( async def test_containers_get_status( - test_client: TestClient, started_containers: List[str] + test_client: TestClient, + started_containers: List[str], + ensure_external_volumes: None, ) -> None: response = await test_client.get( - f"/{api_vtag}/containers", query_string=dict(only_status=True) + f"/{API_VTAG}/containers", query_string=dict(only_status=True) ) assert response.status_code == status.HTTP_200_OK, response.text @@ -218,14 +268,14 @@ async def test_containers_get_status( async def test_containers_inspect_docker_error( test_client: TestClient, started_containers: List[str], mock_containers_get: int ) -> None: - response = await test_client.get(f"/{api_vtag}/containers") + response = await test_client.get(f"/{API_VTAG}/containers") assert response.status_code == mock_containers_get, response.text async def test_containers_docker_status_docker_error( test_client: TestClient, started_containers: List[str], mock_containers_get: int ) -> None: - response = await test_client.get(f"/{api_vtag}/containers") + response = await test_client.get(f"/{API_VTAG}/containers") assert response.status_code == mock_containers_get, response.text @@ -234,11 +284,11 @@ async def test_container_inspect_logs_remove( ) -> None: for container in started_containers: # get container logs - response = await test_client.get(f"/{api_vtag}/containers/{container}/logs") + response = await test_client.get(f"/{API_VTAG}/containers/{container}/logs") assert response.status_code == status.HTTP_200_OK, response.text # inspect container - response = await test_client.get(f"/{api_vtag}/containers/{container}") + response = await test_client.get(f"/{API_VTAG}/containers/{container}") assert response.status_code == status.HTTP_200_OK, response.text parsed_response = response.json() assert parsed_response["Name"] == f"/{container}" @@ -250,7 +300,7 @@ async def test_container_logs_with_timestamps( for container in started_containers: # get container logs response = await test_client.get( - f"/{api_vtag}/containers/{container}/logs", + f"/{API_VTAG}/containers/{container}/logs", query_string=dict(timestamps=True), ) assert response.status_code == status.HTTP_200_OK, response.text @@ -266,12 +316,12 @@ def _expected_error_string(container: str) -> Dict[str, str]: for container in not_started_containers: # get container logs - response = await test_client.get(f"/{api_vtag}/containers/{container}/logs") + response = await test_client.get(f"/{API_VTAG}/containers/{container}/logs") assert response.status_code == status.HTTP_404_NOT_FOUND, response.text assert response.json() == _expected_error_string(container) # inspect container - response = await test_client.get(f"/{api_vtag}/containers/{container}") + response = await test_client.get(f"/{API_VTAG}/containers/{container}") assert response.status_code == status.HTTP_404_NOT_FOUND, response.text assert response.json() == _expected_error_string(container) @@ -286,16 +336,52 @@ def _expected_error_string() -> Dict[str, str]: for container in started_containers: # get container logs - response = await test_client.get(f"/{api_vtag}/containers/{container}/logs") + response = await test_client.get(f"/{API_VTAG}/containers/{container}/logs") assert response.status_code == mock_containers_get, response.text assert response.json() == _expected_error_string() # inspect container - response = await test_client.get(f"/{api_vtag}/containers/{container}") + response = await test_client.get(f"/{API_VTAG}/containers/{container}") assert response.status_code == mock_containers_get, response.text assert response.json() == _expected_error_string() +async def test_container_save_state( + test_client: TestClient, mock_data_manager: None +) -> None: + response = await test_client.post(f"/{API_VTAG}/containers/state:save") + assert response.status_code == status.HTTP_204_NO_CONTENT, response.text + assert response.text == "" + + +async def test_container_restore_state( + test_client: TestClient, mock_data_manager: None +) -> None: + response = await test_client.post(f"/{API_VTAG}/containers/state:restore") + assert response.status_code == status.HTTP_204_NO_CONTENT, response.text + assert response.text == "" + + +async def test_container_pull_input_ports( + test_client: TestClient, mock_port_keys: List[str], mock_nodeports: None +) -> None: + response = await test_client.post( + f"/{API_VTAG}/containers/ports/inputs:pull", json=mock_port_keys + ) + assert response.status_code == status.HTTP_200_OK, response.text + assert response.text == "42" + + +async def test_container_push_output_ports( + test_client: TestClient, mock_port_keys: List[str], mock_nodeports: None +) -> None: + response = await test_client.post( + f"/{API_VTAG}/containers/ports/outputs:push", json=mock_port_keys + ) + assert response.status_code == status.HTTP_204_NO_CONTENT, response.text + assert response.text == "" + + def _get_entrypoint_container_name( test_client: TestClient, dynamic_sidecar_network_name: str ) -> str: @@ -317,7 +403,7 @@ async def test_containers_entrypoint_name_ok( started_containers: List[str], ) -> None: filters = json.dumps({"network": dynamic_sidecar_network_name}) - response = await test_client.get(f"/{api_vtag}/containers/name?filters={filters}") + response = await test_client.get(f"/{API_VTAG}/containers/name?filters={filters}") assert response.status_code == status.HTTP_200_OK, response.text assert response.json() == _get_entrypoint_container_name( test_client, dynamic_sidecar_network_name @@ -343,7 +429,7 @@ async def test_containers_entrypoint_name_containers_not_started( ) filters = json.dumps({"network": dynamic_sidecar_network_name}) - response = await test_client.get(f"/{api_vtag}/containers/name?filters={filters}") + response = await test_client.get(f"/{API_VTAG}/containers/name?filters={filters}") assert response.status_code == status.HTTP_404_NOT_FOUND, response.text assert response.json() == { "detail": "No container found for network=entrypoint_container_network" diff --git a/services/dynamic-sidecar/tests/unit/test_api_mocked.py b/services/dynamic-sidecar/tests/unit/test_api_mocked.py deleted file mode 100644 index f7fcd50af5a..00000000000 --- a/services/dynamic-sidecar/tests/unit/test_api_mocked.py +++ /dev/null @@ -1,35 +0,0 @@ -# pylint: disable=redefined-outer-name -# pylint: disable=unused-argument - - -import json - -import pytest -from async_asgi_testclient import TestClient -from async_asgi_testclient.response import Response - -pytestmark = pytest.mark.asyncio - - -def assert_200_empty(response: Response) -> bool: - assert response.status_code == 200, response.text - assert json.loads(response.text) == "" - return True - - -@pytest.mark.parametrize( - "route,method", - [ - # push api module - ("/push", "POST"), - # retrive api module - ("/retrieve", "GET"), - ("/retrieve", "POST"), - # state api module - ("/state", "GET"), - ("/state", "POST"), - ], -) -async def test_mocked_modules(test_client: TestClient, route: str, method: str) -> None: - response = await test_client.open(route, method=method) - assert assert_200_empty(response) is True diff --git a/services/dynamic-sidecar/tests/unit/test_core_settings.py b/services/dynamic-sidecar/tests/unit/test_core_settings.py new file mode 100644 index 00000000000..7e29434d8c2 --- /dev/null +++ b/services/dynamic-sidecar/tests/unit/test_core_settings.py @@ -0,0 +1,46 @@ +# pylint: disable=unused-argument +# pylint: disable=redefined-outer-name + + +import uuid +from pathlib import Path, PosixPath + +import pytest +from _pytest.monkeypatch import MonkeyPatch +from simcore_service_dynamic_sidecar.core.settings import ( + DynamicSidecarSettings, + get_settings, +) + + +@pytest.fixture +def tmp_dir(tmp_path: PosixPath) -> Path: + return Path(tmp_path) + + +@pytest.fixture +def mocked_non_request_settings(tmp_dir: Path, monkeypatch: MonkeyPatch) -> None: + inputs_dir = tmp_dir / "inputs" + outputs_dir = tmp_dir / "outputs" + + monkeypatch.setenv("SC_BOOT_MODE", "production") + monkeypatch.setenv("DYNAMIC_SIDECAR_COMPOSE_NAMESPACE", "test-space") + monkeypatch.setenv("REGISTRY_AUTH", "false") + monkeypatch.setenv("REGISTRY_USER", "test") + monkeypatch.setenv("REGISTRY_PW", "test") + monkeypatch.setenv("REGISTRY_SSL", "false") + monkeypatch.setenv("DY_SIDECAR_PATH_INPUTS", str(inputs_dir)) + monkeypatch.setenv("DY_SIDECAR_PATH_OUTPUTS", str(outputs_dir)) + monkeypatch.setenv("DY_SIDECAR_USER_ID", "1") + monkeypatch.setenv("DY_SIDECAR_PROJECT_ID", f"{uuid.uuid4()}") + monkeypatch.setenv("DY_SIDECAR_NODE_ID", f"{uuid.uuid4()}") + + +def test_non_request_dynamic_sidecar_settings( + mocked_non_request_settings: None, +) -> None: + assert DynamicSidecarSettings.create() + + +def test_cached_settings_is_same_object(mocked_non_request_settings: None) -> None: + assert id(get_settings()) == id(get_settings()) diff --git a/services/dynamic-sidecar/tests/unit/test_modules_directory_watcher.py b/services/dynamic-sidecar/tests/unit/test_modules_directory_watcher.py new file mode 100644 index 00000000000..76f3eff9ca5 --- /dev/null +++ b/services/dynamic-sidecar/tests/unit/test_modules_directory_watcher.py @@ -0,0 +1,101 @@ +# pylint: disable=unused-argument +# pylint: disable=redefined-outer-name + +import asyncio +from pathlib import Path +from shutil import move +from typing import Iterator +from unittest.mock import AsyncMock + +import pytest +from _pytest.monkeypatch import MonkeyPatch +from py._path.local import LocalPath +from simcore_service_dynamic_sidecar.modules import directory_watcher +from simcore_service_dynamic_sidecar.modules.directory_watcher import ( + DirectoryWatcherObservers, +) + +# TODO: +# - setup and look at a directory +# - do something on that dir when file changes +# - use a mock to check that it calls the function a certain amount of times +# - good way to check the code works properly +# - todo make it run on a separate thread, already there +# - todo use absolute patterns for monitoring + +pytestmark = pytest.mark.asyncio + +TICK_INTERVAL = 0.001 + +# FIXTURES + + +@pytest.fixture +def patch_directory_watcher(monkeypatch: MonkeyPatch) -> Iterator[AsyncMock]: + mocked_upload_data = AsyncMock(return_value=None) + + monkeypatch.setattr(directory_watcher, "DETECTION_INTERVAL", TICK_INTERVAL) + monkeypatch.setattr( + directory_watcher, "push_directory_via_nodeports", mocked_upload_data + ) + + yield mocked_upload_data + + +@pytest.fixture +def temp_dir(tmpdir: LocalPath) -> Path: + return Path(tmpdir) + + +# UTILS + + +async def _generate_event_burst(temp_dir: Path, subfolder: str = None) -> None: + full_dir_path = temp_dir if subfolder is None else temp_dir / subfolder + full_dir_path.mkdir(parents=True, exist_ok=True) + file_path_1 = full_dir_path / "file1.txt" + file_path_2 = full_dir_path / "file2.txt" + # create + file_path_1.touch() + # modified + file_path_1.write_text("lorem ipsum") + # move + move(str(file_path_1), str(file_path_2)) + # delete + file_path_2.unlink() + # let fs events trigger + await asyncio.sleep(TICK_INTERVAL) + + +# TESTS + + +async def test_run_observer( + patch_directory_watcher: AsyncMock, + temp_dir: Path, +) -> None: + + directory_watcher_observers = DirectoryWatcherObservers() + directory_watcher_observers.observe_directory(temp_dir) + + directory_watcher_observers.start() + directory_watcher_observers.start() + + await asyncio.sleep(TICK_INTERVAL) + + # generates the first event chain + await _generate_event_burst(temp_dir) + + await asyncio.sleep(2) + + # generates the second event chain + await _generate_event_burst(temp_dir, "ciao") + + await directory_watcher_observers.stop() + await directory_watcher_observers.stop() + + # pylint: disable=protected-access + assert directory_watcher_observers._keep_running is False + assert directory_watcher_observers._blocking_task is None + + assert patch_directory_watcher.call_count == 2 diff --git a/services/dynamic-sidecar/tests/unit/test_modules_mounted_fs.py b/services/dynamic-sidecar/tests/unit/test_modules_mounted_fs.py new file mode 100644 index 00000000000..d63608f54ad --- /dev/null +++ b/services/dynamic-sidecar/tests/unit/test_modules_mounted_fs.py @@ -0,0 +1,102 @@ +# pylint: disable=redefined-outer-name +# pylint: disable=unused-argument +# pylint: disable=protected-access + +import os +from pathlib import Path +from typing import Any, Iterator, List + +import pytest +from simcore_service_dynamic_sidecar.modules import mounted_fs + +# UTILS + + +def _replace_slashes(path: Path) -> str: + return str(path).replace(os.sep, "_") + + +def _assert_same_object(first: Any, second: Any) -> None: + assert first == second + assert id(first) == id(second) + + +# FIXTURES + + +@pytest.fixture +def clear_mounted_volumes() -> Iterator[None]: + mounted_fs._mounted_volumes = None + yield + mounted_fs._mounted_volumes = None + + +@pytest.fixture +def mounted_volumes(clear_mounted_volumes: None) -> mounted_fs.MountedVolumes: + assert mounted_fs._mounted_volumes is None + mounted_volumes: mounted_fs.MountedVolumes = mounted_fs.setup_mounted_fs() + _assert_same_object(mounted_volumes, mounted_fs.get_mounted_volumes()) + return mounted_volumes + + +@pytest.fixture +def path_to_transform() -> Path: + return Path("/some/path/to/transform") + + +# TESTS + + +def test_name_from_full_path(path_to_transform: Path) -> None: + assert mounted_fs._name_from_full_path( # pylint: disable=protected-access + path_to_transform + ) == _replace_slashes(path_to_transform) + + +def test_setup_ok(mounted_volumes: mounted_fs.MountedVolumes) -> None: + assert mounted_volumes + + +def test_expected_paths_and_volumes( + mounted_volumes: mounted_fs.MountedVolumes, + state_paths_dirs: List[Path], + compose_namespace: str, +) -> None: + assert ( + len(set(mounted_volumes.volume_name_state_paths())) + == len(set(mounted_volumes.get_state_paths_docker_volumes())) + == len(set(mounted_volumes.disk_state_paths())) + ) + + # check location on disk + assert mounted_volumes.disk_outputs_path == mounted_fs.DY_VOLUMES / "outputs" + assert mounted_volumes.disk_inputs_path == mounted_fs.DY_VOLUMES / "inputs" + + assert set(mounted_volumes.disk_state_paths()) == { + mounted_fs.DY_VOLUMES / _replace_slashes(x).strip("_") for x in state_paths_dirs + } + + # check volume mount point + assert mounted_volumes.volume_name_outputs == f"{compose_namespace}_outputs" + assert mounted_volumes.volume_name_inputs == f"{compose_namespace}_inputs" + + assert set(mounted_volumes.volume_name_state_paths()) == { + f"{compose_namespace}{_replace_slashes(x)}" for x in state_paths_dirs + } + + # check docker_volume + assert ( + mounted_volumes.get_inputs_docker_volume() + == f"{mounted_volumes.volume_name_inputs}:{mounted_volumes.inputs_path}" + ) + assert ( + mounted_volumes.get_outputs_docker_volume() + == f"{mounted_volumes.volume_name_outputs}:{mounted_volumes.outputs_path}" + ) + + assert set(mounted_volumes.get_state_paths_docker_volumes()) == { + f"{volume_state_path}:{state_path}" + for volume_state_path, state_path in zip( + mounted_volumes.volume_name_state_paths(), state_paths_dirs + ) + } diff --git a/services/sidecar/Dockerfile b/services/sidecar/Dockerfile index 5f681fb295c..262f646b236 100644 --- a/services/sidecar/Dockerfile +++ b/services/sidecar/Dockerfile @@ -10,10 +10,10 @@ FROM python:${PYTHON_VERSION}-slim-buster as base LABEL maintainer=mguidon -RUN set -eux; \ - apt-get update; \ - apt-get install -y gosu; \ - rm -rf /var/lib/apt/lists/*; \ +RUN set -eux && \ + apt-get update && \ + apt-get install -y gosu && \ + rm -rf /var/lib/apt/lists/* && \ # verify that the binary works gosu nobody true diff --git a/services/storage/Dockerfile b/services/storage/Dockerfile index c68e477cfae..7b538a54684 100644 --- a/services/storage/Dockerfile +++ b/services/storage/Dockerfile @@ -10,10 +10,10 @@ FROM python:${PYTHON_VERSION}-slim-buster as base LABEL maintainer=mguidon -RUN set -eux; \ - apt-get update; \ - apt-get install -y gosu; \ - rm -rf /var/lib/apt/lists/*; \ +RUN set -eux && \ + apt-get update && \ + apt-get install -y gosu && \ + rm -rf /var/lib/apt/lists/* && \ # verify that the binary works gosu nobody true diff --git a/services/web/Dockerfile b/services/web/Dockerfile index 704702c05a3..0ce5876bfe2 100644 --- a/services/web/Dockerfile +++ b/services/web/Dockerfile @@ -12,10 +12,10 @@ FROM python:${PYTHON_VERSION}-slim-buster as base LABEL maintainer=pcrespov -RUN set -eux; \ - apt-get update; \ - apt-get install -y gosu; \ - rm -rf /var/lib/apt/lists/*; \ +RUN set -eux && \ + apt-get update && \ + apt-get install -y gosu && \ + rm -rf /var/lib/apt/lists/* && \ # verify that the binary works gosu nobody true diff --git a/services/web/client/source/class/osparc/data/model/Node.js b/services/web/client/source/class/osparc/data/model/Node.js index 8951d27cee0..7a02f37ec92 100644 --- a/services/web/client/source/class/osparc/data/model/Node.js +++ b/services/web/client/source/class/osparc/data/model/Node.js @@ -126,6 +126,12 @@ qx.Class.define("osparc.data.model.Node", { nullable: true }, + dynamicV2: { + check: "Boolean", + init: false, + nullable: true + }, + serviceUrl: { check: "String", nullable: true, @@ -942,14 +948,14 @@ qx.Class.define("osparc.data.model.Node", { } const srvUrl = this.getServiceUrl(); if (srvUrl) { - let urlUpdate = srvUrl + "/retrieve"; - urlUpdate = urlUpdate.replace("//retrieve", "/retrieve"); + const urlRetrieve = this.isDynamicV2() ? osparc.utils.Utils.computeServiceV2RetrieveUrl(this.getStudy().getUuid(), this.getNodeId()) : osparc.utils.Utils.computeServiceRetrieveUrl(srvUrl); const updReq = new qx.io.request.Xhr(); const reqData = { "port_keys": portKey ? [portKey] : [] }; + updReq.setRequestHeader("Content-Type", "application/json"); updReq.set({ - url: urlUpdate, + url: urlRetrieve, method: "POST", requestData: qx.util.Serializer.toJson(reqData) }); @@ -1055,20 +1061,13 @@ qx.Class.define("osparc.data.model.Node", { return; } - const isDynamicType = data["boot_type"] === "V2" || false; - if (isDynamicType) { - // dynamic service - const srvUrl = window.location.protocol + "//" + nodeId + ".services." + window.location.host; + const { + srvUrl, + isDynamicV2 + } = osparc.utils.Utils.computeServiceUrl(data); + this.setDynamicV2(isDynamicV2); + if (srvUrl) { this.__waitForServiceReady(srvUrl); - } else { - // old implementation - const servicePath = data["service_basepath"]; - const entryPointD = data["entry_point"]; - if (servicePath) { - const entryPoint = entryPointD ? ("/" + entryPointD) : "/"; - const srvUrl = servicePath + entryPoint; - this.__waitForServiceReady(srvUrl); - } } break; } diff --git a/services/web/client/source/class/osparc/utils/Utils.js b/services/web/client/source/class/osparc/utils/Utils.js index 63264a17e91..1893f9fe991 100644 --- a/services/web/client/source/class/osparc/utils/Utils.js +++ b/services/web/client/source/class/osparc/utils/Utils.js @@ -30,6 +30,40 @@ qx.Class.define("osparc.utils.Utils", { type: "static", statics: { + computeServiceUrl: function(resp) { + const data = { + srvUrl: null, + isDynamicV2: null + }; + const isDynamicV2 = resp["boot_type"] === "V2" || false; + data["isDynamicV2"] = isDynamicV2; + if (isDynamicV2) { + // dynamic service + const srvUrl = window.location.protocol + "//" + resp["service_uuid"] + ".services." + window.location.host; + data["srvUrl"] = srvUrl; + } else { + // old implementation + const servicePath = resp["service_basepath"]; + const entryPointD = resp["entry_point"]; + if (servicePath) { + const entryPoint = entryPointD ? ("/" + entryPointD) : "/"; + const srvUrl = servicePath + entryPoint; + data["srvUrl"] = srvUrl; + } + } + return data; + }, + + computeServiceRetrieveUrl: function(srvUrl) { + const urlRetrieve = srvUrl + "/retrieve"; + return urlRetrieve.replace("//retrieve", "/retrieve"); + }, + + computeServiceV2RetrieveUrl: function(studyId, nodeId) { + const urlBase = window.location.protocol + "//" + window.location.host + "/v0"; + return urlBase + "/projects/" + studyId + "/nodes/" + nodeId + ":retrieve"; + }, + setZoom: function(el, zoom) { const transformOrigin = [0, 0]; const p = ["webkit", "moz", "ms", "o"]; diff --git a/services/web/client/source/class/osparc/viewer/NodeViewer.js b/services/web/client/source/class/osparc/viewer/NodeViewer.js index 5411dbe3f01..be3161b314c 100644 --- a/services/web/client/source/class/osparc/viewer/NodeViewer.js +++ b/services/web/client/source/class/osparc/viewer/NodeViewer.js @@ -61,6 +61,17 @@ qx.Class.define("osparc.viewer.NodeViewer", { nodeId: { check: "String", nullable: false + }, + + serviceUrl: { + check: "String", + nullable: true + }, + + dynamicV2: { + check: "Boolean", + init: false, + nullable: true } }, @@ -131,20 +142,13 @@ qx.Class.define("osparc.viewer.NodeViewer", { return; } - const isDynamicType = data["boot_type"] === "V2" || false; - if (isDynamicType) { - // dynamic service - const srvUrl = window.location.protocol + "//" + nodeId + ".services." + window.location.host; + const { + srvUrl, + isDynamicV2 + } = osparc.utils.Utils.computeServiceUrl(data); + this.setDynamicV2(isDynamicV2); + if (srvUrl) { this.__waitForServiceReady(srvUrl); - } else { - // old implementation - const servicePath = data["service_basepath"]; - const entryPointD = data["entry_point"]; - if (servicePath) { - const entryPoint = entryPointD ? ("/" + entryPointD) : "/"; - const srvUrl = servicePath + entryPoint; - this.__waitForServiceReady(srvUrl); - } } break; } @@ -165,15 +169,30 @@ qx.Class.define("osparc.viewer.NodeViewer", { // ping for some time until it is really ready const pingRequest = new qx.io.request.Xhr(srvUrl); pingRequest.addListenerOnce("success", () => { - // retrieveInputs - let urlUpdate = srvUrl + "/retrieve"; - urlUpdate = urlUpdate.replace("//retrieve", "/retrieve"); + this.__serviceReadyIn(srvUrl); + }, this); + pingRequest.addListenerOnce("fail", () => { + const interval = 2000; + qx.event.Timer.once(() => this.__waitForServiceReady(srvUrl), this, interval); + }); + pingRequest.send(); + }, + + __serviceReadyIn: function(srvUrl) { + this.setServiceUrl(srvUrl); + this.__retrieveInputs(); + }, + + __retrieveInputs: function() { + const srvUrl = this.getServiceUrl(); + if (srvUrl) { + const urlRetrieve = this.isDynamicV2() ? osparc.utils.Utils.computeServiceV2RetrieveUrl(this.getStudyId(), this.getNodeId()) : osparc.utils.Utils.computeServiceRetrieveUrl(srvUrl); const updReq = new qx.io.request.Xhr(); const reqData = { "port_keys": [] }; updReq.set({ - url: urlUpdate, + url: urlRetrieve, method: "POST", requestData: qx.util.Serializer.toJson(reqData) }); @@ -182,12 +201,7 @@ qx.Class.define("osparc.viewer.NodeViewer", { this.__iFrameChanged(); }, this); updReq.send(); - }, this); - pingRequest.addListenerOnce("fail", () => { - const interval = 2000; - qx.event.Timer.once(() => this.__waitForServiceReady(srvUrl), this, interval); - }); - pingRequest.send(); + } }, __iFrameChanged: function() { diff --git a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml index 0bc97aeaff6..57d1c75bb1e 100644 --- a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml +++ b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml @@ -11874,6 +11874,133 @@ paths: message: Password is not secure field: pasword status: 400 + '/projects/{project_id}/nodes/{node_id}:retrieve': + parameters: + - name: project_id + in: path + required: true + schema: + type: string + - name: node_id + in: path + required: true + schema: + type: string + post: + tags: + - project + description: Triggers service retrieve + operationId: post_retrieve + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + port_keys: + description: list of por keys to be retrieved + type: array + items: + type: string + responses: + '200': + description: Returns the amount of transferred bytes when pulling data via nodeports + content: + application/json: + schema: + type: object + properties: + data: + type: object + description: response payload + properties: + size_bytes: + type: integer + description: amount of transferred bytes + default: + description: Default http error response body + content: + application/json: + schema: + type: object + required: + - error + properties: + data: + nullable: true + default: null + error: + type: object + nullable: true + properties: + logs: + description: log messages + type: array + items: + type: object + properties: + level: + description: log level + type: string + default: INFO + enum: + - DEBUG + - WARNING + - INFO + - ERROR + message: + description: 'log message. If logger is USER, then it MUST be human readable' + type: string + logger: + description: name of the logger receiving this message + type: string + required: + - message + example: + message: 'Hi there, Mr user' + level: INFO + logger: user-logger + errors: + description: errors metadata + type: array + items: + type: object + required: + - code + - message + properties: + code: + type: string + description: Typically the name of the exception that produced it otherwise some known error code + message: + type: string + description: Error message specific to this item + resource: + type: string + description: API resource affected by this error + field: + type: string + description: Specific field within the resource + status: + description: HTTP error code + type: integer + example: + BadRequestError: + logs: + - message: Requested information is incomplete or malformed + level: ERROR + - message: Invalid email and password + level: ERROR + logger: USER + errors: + - code: InvalidEmail + message: Email is malformed + field: email + - code: UnsavePassword + message: Password is not secure + field: pasword + status: 400 '/nodes/{nodeInstanceUUID}/outputUi/{outputKey}': get: tags: diff --git a/services/web/server/src/simcore_service_webserver/director_v2_api.py b/services/web/server/src/simcore_service_webserver/director_v2_api.py index 12c9049322f..560374f33df 100644 --- a/services/web/server/src/simcore_service_webserver/director_v2_api.py +++ b/services/web/server/src/simcore_service_webserver/director_v2_api.py @@ -9,6 +9,7 @@ get_services, is_healthy, request_retrieve_dyn_service, + retrieve, start_service, stop_service, stop_services, @@ -24,6 +25,7 @@ "get_services", "is_healthy", "request_retrieve_dyn_service", + "retrieve", "start_service", "stop_service", "stop_services", diff --git a/services/web/server/src/simcore_service_webserver/director_v2_core.py b/services/web/server/src/simcore_service_webserver/director_v2_core.py index 9c93cd64702..452cdfaaa59 100644 --- a/services/web/server/src/simcore_service_webserver/director_v2_core.py +++ b/services/web/server/src/simcore_service_webserver/director_v2_core.py @@ -11,6 +11,7 @@ from pydantic.types import PositiveInt from servicelib.logging_utils import log_decorator from servicelib.utils import logged_gather +from tenacity import AsyncRetrying, stop_after_attempt, wait_exponential from yarl import URL from .director_v2_settings import Directorv2Settings, get_client_session, get_settings @@ -63,11 +64,11 @@ async def _request_director_v2( ) # NOTE: - # sometimes director-v0 (via redirects) - # replies in plain text and this is considered an error - # - if response.status != expected_status.status_code or isinstance( - payload, str + # - `sometimes director-v0` (via redirects) replies + # in plain text and this is considered an error + # - `director-v2` and `director-v0` can reply with 204 no content + if response.status != expected_status.status_code or ( + response.status != web.HTTPNoContent and isinstance(payload, str) ): raise DirectorServiceError(response.status, reason=str(payload)) @@ -319,13 +320,51 @@ async def stop_services( await logged_gather(*services_to_stop) +def _retry_parameters() -> Dict[str, Any]: + return dict(stop=stop_after_attempt(3), wait=wait_exponential(), reraise=True) + + @log_decorator(logger=log) async def get_service_state(app: web.Application, node_uuid: str) -> DataType: settings: Directorv2Settings = get_settings(app) backend_url = URL(settings.endpoint) / "dynamic_services" / f"{node_uuid}" - service_state = await _request_director_v2( - app, "GET", backend_url, expected_status=web.HTTPOk - ) + + # sometimes the director-v2 cannot be reached causing the service to fail + # retrying 3 times before giving up for good + async for attempt in AsyncRetrying(**_retry_parameters()): + with attempt: + service_state = await _request_director_v2( + app, "GET", backend_url, expected_status=web.HTTPOk + ) assert isinstance(service_state, dict) # nosec return service_state + + +@log_decorator(logger=log) +async def retrieve( + app: web.Application, node_uuid: str, port_keys: List[str] +) -> DataBody: + # when triggering retrieve endpoint + # this will allow to sava bigger datasets from the services + timeout = ServicesCommonSettings().storage_service_upload_download_timeout + + director2_settings: Directorv2Settings = get_settings(app) + backend_url = ( + URL(director2_settings.endpoint) / "dynamic_services" / f"{node_uuid}:retrieve" + ) + body = dict(port_keys=port_keys) + + async for attempt in AsyncRetrying(**_retry_parameters()): + with attempt: + retry_result = await _request_director_v2( + app, + "POST", + backend_url, + expected_status=web.HTTPOk, + data=body, + timeout=timeout, + ) + + assert isinstance(retry_result, dict) # nosec + return retry_result diff --git a/services/web/server/src/simcore_service_webserver/projects/projects_nodes_handlers.py b/services/web/server/src/simcore_service_webserver/projects/projects_nodes_handlers.py index 2b7fee8d46e..dbf1067ffa7 100644 --- a/services/web/server/src/simcore_service_webserver/projects/projects_nodes_handlers.py +++ b/services/web/server/src/simcore_service_webserver/projects/projects_nodes_handlers.py @@ -98,6 +98,21 @@ async def get_node(request: web.Request) -> web.Response: @routes.delete(f"/{VTAG}/projects/{{project_uuid}}/nodes/{{node_uuid}}") +@login_required +@permission_required("project.node.read") +async def post_retrieve(request: web.Request) -> web.Response: + try: + node_uuid = request.match_info["node_id"] + data = await request.json() + port_keys = data.get("port_keys", []) + except KeyError as err: + raise web.HTTPBadRequest(reason=f"Invalid request parameter {err}") from err + + return web.json_response( + await director_v2_api.retrieve(request.app, node_uuid, port_keys) + ) + + @login_required @permission_required("project.node.delete") async def delete_node(request: web.Request) -> web.Response: