ITISFoundation
diff --git a/‎packages/dask-task-models-library/src/dask_task_models_library/container_tasks/utils.py
Lines changed: 44 additions & 0 deletions b/‎packages/dask-task-models-library/src/dask_task_models_library/container_tasks/utils.py
Lines changed: 44 additions & 0 deletions
diff --git a/‎packages/dask-task-models-library/src/dask_task_models_library/models.py
Lines changed: 4 additions & 0 deletions b/‎packages/dask-task-models-library/src/dask_task_models_library/models.py
Lines changed: 4 additions & 0 deletions
diff --git a/‎packages/dask-task-models-library/tests/container_tasks/test_utils.py
Lines changed: 68 additions & 0 deletions b/‎packages/dask-task-models-library/tests/container_tasks/test_utils.py
Lines changed: 68 additions & 0 deletions
diff --git a/‎services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_core.py
Lines changed: 37 additions & 37 deletions b/‎services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_core.py
Lines changed: 37 additions & 37 deletions
@@ -0,0 +1,44 @@
+from typing import Final
+from uuid import uuid4
+
+from models_library.projects import ProjectID
+from models_library.projects_nodes_io import NodeID
+from models_library.services_types import ServiceKey, ServiceVersion
+from models_library.users import UserID
+from pydantic import TypeAdapter
+
+from ..models import DaskJobID
+
+
+def generate_dask_job_id(
+    service_key: ServiceKey,
+    service_version: ServiceVersion,
+    user_id: UserID,
+    project_id: ProjectID,
+    node_id: NodeID,
+) -> DaskJobID:
+    """creates a dask job id:
+    The job ID shall contain the user_id, project_id, node_id
+    Also, it must be unique
+    and it is shown in the Dask scheduler dashboard website
+    """
+    return DaskJobID(
+        f"{service_key}:{service_version}:userid_{user_id}:projectid_{project_id}:nodeid_{node_id}:uuid_{uuid4()}"
+    )
+
+
+_JOB_ID_PARTS: Final[int] = 6
+
+
+def parse_dask_job_id(
+    job_id: str,
+) -> tuple[ServiceKey, ServiceVersion, UserID, ProjectID, NodeID]:
+    parts = job_id.split(":")
+    assert len(parts) == _JOB_ID_PARTS  # nosec
+    return (
+        parts[0],
+        parts[1],
+        TypeAdapter(UserID).validate_python(parts[2][len("userid_") :]),
+        ProjectID(parts[3][len("projectid_") :]),
+        NodeID(parts[4][len("nodeid_") :]),
+    )
@@ -0,0 +1,4 @@
+from typing import TypeAlias
+
+DaskJobID: TypeAlias = str
+DaskResources: TypeAlias = dict[str, int | float]
@@ -0,0 +1,68 @@
+# pylint: disable=too-many-positional-arguments
+# pylint:disable=redefined-outer-name
+# pylint:disable=too-many-arguments
+# pylint:disable=unused-argument
+# pylint:disable=unused-variable
+
+import pytest
+from dask_task_models_library.container_tasks.utils import (
+    generate_dask_job_id,
+    parse_dask_job_id,
+)
+from faker import Faker
+from models_library.projects import ProjectID
+from models_library.projects_nodes_io import NodeID
+from models_library.services_types import ServiceKey, ServiceVersion
+from models_library.users import UserID
+from pydantic import TypeAdapter
+
+
+@pytest.fixture(
+    params=["simcore/service/comp/some/fake/service/key", "dockerhub-style/service_key"]
+)
+def service_key(request) -> ServiceKey:
+    return request.param
+
+
+@pytest.fixture()
+def service_version() -> str:
+    return "1234.32432.2344"
+
+
+@pytest.fixture
+def user_id(faker: Faker) -> UserID:
+    return TypeAdapter(UserID).validate_python(faker.pyint(min_value=1))
+
+
+@pytest.fixture
+def project_id(faker: Faker) -> ProjectID:
+    return ProjectID(faker.uuid4())
+
+
+@pytest.fixture
+def node_id(faker: Faker) -> NodeID:
+    return NodeID(faker.uuid4())
+
+
+def test_dask_job_id_serialization(
+    service_key: ServiceKey,
+    service_version: ServiceVersion,
+    user_id: UserID,
+    project_id: ProjectID,
+    node_id: NodeID,
+):
+    dask_job_id = generate_dask_job_id(
+        service_key, service_version, user_id, project_id, node_id
+    )
+    (
+        parsed_service_key,
+        parsed_service_version,
+        parsed_user_id,
+        parsed_project_id,
+        parsed_node_id,
+    ) = parse_dask_job_id(dask_job_id)
+    assert service_key == parsed_service_key
+    assert service_version == parsed_service_version
+    assert user_id == parsed_user_id
+    assert project_id == parsed_project_id
+    assert node_id == parsed_node_id
@@ -18,6 +18,7 @@
 from aws_library.ec2._errors import EC2TooManyInstancesError
 from fastapi import FastAPI
 from models_library.generated_models.docker_rest_api import Node, NodeState
+from models_library.rabbitmq_messages import ProgressType
 from servicelib.logging_utils import log_catch, log_context
 from servicelib.utils import limited_gather
 from servicelib.utils_formatting import timedelta_as_minute_second
@@ -51,7 +52,11 @@
     get_deactivated_buffer_ec2_tags,
     is_buffer_machine,
 )
-from ..utils.rabbitmq import post_autoscaling_status_message
+from ..utils.rabbitmq import (
+    post_autoscaling_status_message,
+    post_tasks_log_message,
+    post_tasks_progress_message,
+)
 from .auto_scaling_mode_base import BaseAutoscaling
 from .docker import get_docker_client
 from .ec2 import get_ec2_client
@@ -354,7 +359,6 @@ def _as_selection(instance_type: EC2InstanceType) -> int:
 
 async def _activate_and_notify(
     app: FastAPI,
-    auto_scaling_mode: BaseAutoscaling,
     drained_node: AssociatedInstance,
 ) -> AssociatedInstance:
     app_settings = get_application_settings(app)
@@ -363,14 +367,17 @@ async def _activate_and_notify(
         utils_docker.set_node_osparc_ready(
             app_settings, docker_client, drained_node.node, ready=True
         ),
-        auto_scaling_mode.log_message_from_tasks(
+        post_tasks_log_message(
             app,
-            drained_node.assigned_tasks,
-            "cluster adjusted, service should start shortly...",
+            tasks=drained_node.assigned_tasks,
+            message="cluster adjusted, service should start shortly...",
             level=logging.INFO,
         ),
-        auto_scaling_mode.progress_message_from_tasks(
-            app, drained_node.assigned_tasks, progress=1.0
+        post_tasks_progress_message(
+            app,
+            tasks=drained_node.assigned_tasks,
+            progress=1.0,
+            progress_type=ProgressType.CLUSTER_UP_SCALING,
         ),
     )
     return dataclasses.replace(drained_node, node=updated_node)
@@ -379,7 +386,6 @@ async def _activate_and_notify(
 async def _activate_drained_nodes(
     app: FastAPI,
     cluster: Cluster,
-    auto_scaling_mode: BaseAutoscaling,
 ) -> Cluster:
     nodes_to_activate = [
         node
@@ -396,10 +402,7 @@ async def _activate_drained_nodes(
         f"activate {len(nodes_to_activate)} drained nodes {[n.ec2_instance.id for n in nodes_to_activate]}",
     ):
         activated_nodes = await asyncio.gather(
-            *(
-                _activate_and_notify(app, auto_scaling_mode, node)
-                for node in nodes_to_activate
-            )
+            *(_activate_and_notify(app, node) for node in nodes_to_activate)
         )
     new_active_node_ids = {node.ec2_instance.id for node in activated_nodes}
     remaining_drained_nodes = [
@@ -787,10 +790,10 @@ async def _launch_instances(
             app, needed_instances, new_instance_tags
         )
     except EC2TooManyInstancesError:
-        await auto_scaling_mode.log_message_from_tasks(
+        await post_tasks_log_message(
             app,
-            tasks,
-            "The maximum number of machines in the cluster was reached. Please wait for your running jobs "
+            tasks=tasks,
+            message="The maximum number of machines in the cluster was reached. Please wait for your running jobs "
             "to complete and try again later or contact osparc support if this issue does not resolve.",
             level=logging.ERROR,
         )
@@ -829,10 +832,10 @@ async def _launch_instances(
     new_pending_instances: list[EC2InstanceData] = []
     for r in results:
         if isinstance(r, EC2TooManyInstancesError):
-            await auto_scaling_mode.log_message_from_tasks(
+            await post_tasks_log_message(
                 app,
-                tasks,
-                "Exceptionally high load on computational cluster, please try again later.",
+                tasks=tasks,
+                message="Exceptionally high load on computational cluster, please try again later.",
                 level=logging.ERROR,
             )
         elif isinstance(r, BaseException):
@@ -847,14 +850,14 @@ async def _launch_instances(
         f"{sum(n for n in capped_needed_machines.values())} new machines launched"
         ", it might take up to 3 minutes to start, Please wait..."
     )
-    await auto_scaling_mode.log_message_from_tasks(
-        app, tasks, log_message, level=logging.INFO
+    await post_tasks_log_message(
+        app, tasks=tasks, message=log_message, level=logging.INFO
     )
     if last_issue:
-        await auto_scaling_mode.log_message_from_tasks(
+        await post_tasks_log_message(
             app,
-            tasks,
-            "Unexpected issues detected, probably due to high load, please contact support",
+            tasks=tasks,
+            message="Unexpected issues detected, probably due to high load, please contact support",
             level=logging.ERROR,
         )
 
@@ -1064,7 +1067,6 @@ async def _try_scale_down_cluster(app: FastAPI, cluster: Cluster) -> Cluster:
 async def _notify_based_on_machine_type(
     app: FastAPI,
     instances: list[AssociatedInstance] | list[NonAssociatedInstance],
-    auto_scaling_mode: BaseAutoscaling,
     *,
     message: str,
 ) -> None:
@@ -1088,24 +1090,22 @@ async def _notify_based_on_machine_type(
             f" est. remaining time: {timedelta_as_minute_second(estimated_time_to_completion)})...please wait..."
         )
         if tasks:
-            await auto_scaling_mode.log_message_from_tasks(
-                app, tasks, message=msg, level=logging.INFO
+            await post_tasks_log_message(
+                app, tasks=tasks, message=msg, level=logging.INFO
             )
-            await auto_scaling_mode.progress_message_from_tasks(
+            await post_tasks_progress_message(
                 app,
-                tasks,
+                tasks=tasks,
                 progress=time_since_launch.total_seconds()
                 / instance_max_time_to_start.total_seconds(),
+                progress_type=ProgressType.CLUSTER_UP_SCALING,
             )
 
 
-async def _notify_machine_creation_progress(
-    app: FastAPI, cluster: Cluster, auto_scaling_mode: BaseAutoscaling
-) -> None:
+async def _notify_machine_creation_progress(app: FastAPI, cluster: Cluster) -> None:
     await _notify_based_on_machine_type(
         app,
         cluster.pending_ec2s,
-        auto_scaling_mode,
         message="waiting for machine to join cluster",
     )
 
@@ -1191,10 +1191,10 @@ async def _scale_up_cluster(
     if needed_ec2_instances := await _find_needed_instances(
         app, unassigned_tasks, allowed_instance_types, cluster, auto_scaling_mode
     ):
-        await auto_scaling_mode.log_message_from_tasks(
+        await post_tasks_log_message(
             app,
-            unassigned_tasks,
-            "service is pending due to missing resources, scaling up cluster now...",
+            tasks=unassigned_tasks,
+            message="service is pending due to missing resources, scaling up cluster now...",
             level=logging.INFO,
         )
         new_pending_instances = await _launch_instances(
@@ -1228,7 +1228,7 @@ async def _autoscale_cluster(
     )
 
     # 2. activate available drained nodes to cover some of the tasks
-    cluster = await _activate_drained_nodes(app, cluster, auto_scaling_mode)
+    cluster = await _activate_drained_nodes(app, cluster)
 
     # 3. start warm buffer instances to cover the remaining tasks
     cluster = await _start_warm_buffer_instances(app, cluster, auto_scaling_mode)
@@ -1301,5 +1301,5 @@ async def auto_scale_cluster(
     )
 
     # notify
-    await _notify_machine_creation_progress(app, cluster, auto_scaling_mode)
+    await _notify_machine_creation_progress(app, cluster)
     await _notify_autoscaling_status(app, cluster, auto_scaling_mode)