Skip to content

Commit 38109f8

Browse files
GitHKAndrei Neagu
and
Andrei Neagu
authored
♻️ reroute user services restart via dynamic-scheduler (#6943)
Co-authored-by: Andrei Neagu <[email protected]>
1 parent acd677a commit 38109f8

File tree

12 files changed

+85
-33
lines changed

12 files changed

+85
-33
lines changed

packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/dynamic_scheduler/services.py

+16
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,22 @@ async def stop_dynamic_service(
9999
assert result is None # nosec
100100

101101

102+
@log_decorator(_logger, level=logging.DEBUG)
103+
async def restart_user_services(
104+
rabbitmq_rpc_client: RabbitMQRPCClient,
105+
*,
106+
node_id: NodeID,
107+
timeout_s: NonNegativeInt,
108+
) -> None:
109+
result = await rabbitmq_rpc_client.request(
110+
DYNAMIC_SCHEDULER_RPC_NAMESPACE,
111+
_RPC_METHOD_NAME_ADAPTER.validate_python("restart_user_services"),
112+
node_id=node_id,
113+
timeout_s=timeout_s,
114+
)
115+
assert result is None # nosec
116+
117+
102118
@log_decorator(_logger, level=logging.DEBUG)
103119
async def retrieve_inputs(
104120
rabbitmq_rpc_client: RabbitMQRPCClient,

services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rpc/_services.py

+5
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,11 @@ async def stop_dynamic_service(
6262
)
6363

6464

65+
@router.expose()
66+
async def restart_user_services(app: FastAPI, *, node_id: NodeID) -> None:
67+
await scheduler_interface.restart_user_services(app, node_id=node_id)
68+
69+
6570
@router.expose()
6671
async def retrieve_inputs(
6772
app: FastAPI, *, node_id: NodeID, port_keys: list[ServicePortKey]

services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/director_v2/_public_client.py

+3
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,9 @@ async def list_tracked_dynamic_services(
125125
)
126126
return TypeAdapter(list[DynamicServiceGet]).validate_python(response.json())
127127

128+
async def restart_user_services(self, *, node_id: NodeID) -> None:
129+
await self.thin_client.post_restart(node_id=node_id)
130+
128131
async def update_projects_networks(self, *, project_id: ProjectID) -> None:
129132
await self.thin_client.patch_projects_networks(project_id=project_id)
130133

services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/director_v2/_thin_client.py

+5
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,11 @@ async def get_dynamic_services(
142142
params=as_dict_exclude_unset(user_id=user_id, project_id=project_id),
143143
)
144144

145+
@retry_on_errors()
146+
@expect_status(status.HTTP_204_NO_CONTENT)
147+
async def post_restart(self, *, node_id: NodeID) -> Response:
148+
return await self.client.post(f"/dynamic_services/{node_id}:restart")
149+
145150
@retry_on_errors()
146151
@expect_status(status.HTTP_204_NO_CONTENT)
147152
async def patch_projects_networks(self, *, project_id: ProjectID) -> Response:

services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/scheduler_interface.py

+9
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,15 @@ async def stop_dynamic_service(
7979
await set_request_as_stopped(app, dynamic_service_stop)
8080

8181

82+
async def restart_user_services(app: FastAPI, *, node_id: NodeID) -> None:
83+
settings: ApplicationSettings = app.state.settings
84+
if settings.DYNAMIC_SCHEDULER_USE_INTERNAL_SCHEDULER:
85+
raise NotImplementedError
86+
87+
director_v2_client = DirectorV2Client.get_from_app_state(app)
88+
await director_v2_client.restart_user_services(node_id=node_id)
89+
90+
8291
async def retrieve_inputs(
8392
app: FastAPI, *, node_id: NodeID, port_keys: list[ServicePortKey]
8493
) -> RetrieveDataOutEnveloped:

services/dynamic-scheduler/tests/unit/api_rpc/test_api_rpc__services.py

+22-3
Original file line numberDiff line numberDiff line change
@@ -494,15 +494,34 @@ async def test_stop_dynamic_service_serializes_generic_errors(
494494

495495

496496
@pytest.fixture
497-
def mock_director_v2_service_retrieve_inputs(node_id: NodeID) -> Iterator[None]:
497+
def mock_director_v2_restart_user_services(node_id: NodeID) -> Iterator[None]:
498498
with respx.mock(
499499
base_url="http://director-v2:8000/v2",
500500
assert_all_called=False,
501501
assert_all_mocked=True, # IMPORTANT: KEEP always True!
502502
) as mock:
503-
request_ok = mock.post(f"/dynamic_services/{node_id}:retrieve")
503+
mock.post(f"/dynamic_services/{node_id}:restart").respond(
504+
status.HTTP_204_NO_CONTENT
505+
)
506+
yield None
507+
508+
509+
async def test_restart_user_services(
510+
mock_director_v2_restart_user_services: None,
511+
rpc_client: RabbitMQRPCClient,
512+
node_id: NodeID,
513+
):
514+
await services.restart_user_services(rpc_client, node_id=node_id, timeout_s=5)
504515

505-
request_ok.respond(
516+
517+
@pytest.fixture
518+
def mock_director_v2_service_retrieve_inputs(node_id: NodeID) -> Iterator[None]:
519+
with respx.mock(
520+
base_url="http://director-v2:8000/v2",
521+
assert_all_called=False,
522+
assert_all_mocked=True, # IMPORTANT: KEEP always True!
523+
) as mock:
524+
mock.post(f"/dynamic_services/{node_id}:retrieve").respond(
506525
status.HTTP_200_OK,
507526
text=TypeAdapter(RetrieveDataOutEnveloped)
508527
.validate_python(

services/web/server/src/simcore_service_webserver/director_v2/_core_dynamic_services.py

-18
Original file line numberDiff line numberDiff line change
@@ -18,24 +18,6 @@
1818
_log = logging.getLogger(__name__)
1919

2020

21-
@log_decorator(logger=_log)
22-
async def restart_dynamic_service(app: web.Application, node_uuid: str) -> None:
23-
"""User restart the dynamic dynamic service started in the node_uuid
24-
25-
NOTE that this operation will NOT restart all sidecar services
26-
(``simcore-service-dynamic-sidecar`` or ``reverse-proxy caddy`` services) but
27-
ONLY those containers in the compose-spec (i.e. the ones exposed to the user)
28-
"""
29-
settings: DirectorV2Settings = get_plugin_settings(app)
30-
await request_director_v2(
31-
app,
32-
"POST",
33-
url=settings.base_url / f"dynamic_services/{node_uuid}:restart",
34-
expected_status=web.HTTPOk,
35-
timeout=settings.DIRECTOR_V2_RESTART_DYNAMIC_SERVICE_TIMEOUT,
36-
)
37-
38-
3921
@log_decorator(logger=_log)
4022
async def get_project_inactivity(
4123
app: web.Application,

services/web/server/src/simcore_service_webserver/director_v2/api.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
is_pipeline_running,
1717
stop_pipeline,
1818
)
19-
from ._core_dynamic_services import get_project_inactivity, restart_dynamic_service
19+
from ._core_dynamic_services import get_project_inactivity
2020
from ._core_utils import is_healthy
2121
from .exceptions import DirectorServiceError
2222

@@ -32,7 +32,6 @@
3232
"get_project_run_policy",
3333
"is_healthy",
3434
"is_pipeline_running",
35-
"restart_dynamic_service",
3635
"set_project_run_policy",
3736
"stop_pipeline",
3837
)

services/web/server/src/simcore_service_webserver/director_v2/settings.py

-8
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,6 @@ def base_url(self) -> URL:
3333
# - Mostly in floats (aiohttp.Client/) but sometimes in ints
3434
# - Typically in seconds but occasionally in ms
3535

36-
DIRECTOR_V2_RESTART_DYNAMIC_SERVICE_TIMEOUT: PositiveInt = Field(
37-
1 * _MINUTE,
38-
description="timeout of containers restart",
39-
validation_alias=AliasChoices(
40-
"DIRECTOR_V2_RESTART_DYNAMIC_SERVICE_TIMEOUT",
41-
),
42-
)
43-
4436
DIRECTOR_V2_STORAGE_SERVICE_UPLOAD_DOWNLOAD_TIMEOUT: PositiveInt = Field(
4537
_HOUR,
4638
description=(

services/web/server/src/simcore_service_webserver/dynamic_scheduler/api.py

+17
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,23 @@ async def stop_dynamic_services_in_project(
154154
await logged_gather(*services_to_stop)
155155

156156

157+
async def restart_user_services(app: web.Application, *, node_id: NodeID) -> None:
158+
"""Restarts the user service(s) started by the the node_uuid's sidecar
159+
160+
NOTE: this operation will NOT restart
161+
sidecar services (``dy-sidecar`` or ``dy-proxy`` services),
162+
but ONLY user services (the ones defined by the compose spec).
163+
"""
164+
settings: DynamicSchedulerSettings = get_plugin_settings(app)
165+
await services.restart_user_services(
166+
get_rabbitmq_rpc_client(app),
167+
node_id=node_id,
168+
timeout_s=int(
169+
settings.DYNAMIC_SCHEDULER_RESTART_USER_SERVICES_TIMEOUT.total_seconds()
170+
),
171+
)
172+
173+
157174
async def retrieve_inputs(
158175
app: web.Application, node_id: NodeID, port_keys: list[ServicePortKey]
159176
) -> RetrieveDataOutEnveloped:

services/web/server/src/simcore_service_webserver/dynamic_scheduler/settings.py

+4
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@ class DynamicSchedulerSettings(BaseCustomSettings, MixinServiceSettings):
2626
),
2727
)
2828

29+
DYNAMIC_SCHEDULER_RESTART_USER_SERVICES_TIMEOUT: datetime.timedelta = Field(
30+
datetime.timedelta(minutes=1), description="timeout for user services restart"
31+
)
32+
2933
DYNAMIC_SCHEDULER_SERVICE_UPLOAD_DOWNLOAD_TIMEOUT: datetime.timedelta = Field(
3034
datetime.timedelta(hours=1),
3135
description=(

services/web/server/src/simcore_service_webserver/projects/_nodes_handlers.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@
6262

6363
from .._meta import API_VTAG as VTAG
6464
from ..catalog import client as catalog_client
65-
from ..director_v2 import api as director_v2_api
6665
from ..dynamic_scheduler import api as dynamic_scheduler_api
6766
from ..groups.api import get_group_from_gid, list_all_user_groups_ids
6867
from ..groups.exceptions import GroupNotFoundError
@@ -411,7 +410,9 @@ async def restart_node(request: web.Request) -> web.Response:
411410

412411
path_params = parse_request_path_parameters_as(NodePathParams, request)
413412

414-
await director_v2_api.restart_dynamic_service(request.app, f"{path_params.node_id}")
413+
await dynamic_scheduler_api.restart_user_services(
414+
request.app, node_id=path_params.node_id
415+
)
415416

416417
return web.json_response(status=status.HTTP_204_NO_CONTENT)
417418

0 commit comments

Comments
 (0)