Skip to content

Commit 59767b4

Browse files
authored
✨ Computational backend: connect to resource tracking via RabbitMQ (🗃️, ⚠️) (#4570)
1 parent 74636e0 commit 59767b4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+1701
-379
lines changed

.github/CODEOWNERS

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
# files and folders recursively
77
.codeclimate.yml @sanderegg @pcrespov
88
.env-* @mrnicegyu11 @YuryHrytsuk
9-
.travis.yml @sanderegg
109
Makefile @pcrespov @sanderegg
1110

1211

@@ -21,10 +20,9 @@ Makefile @pcrespov @sanderegg
2120
/packages/service-library/ @pcrespov
2221
/packages/settings-library/ @pcrespov @sanderegg
2322
/requirements/ @pcrespov @matusdrobuliak66
24-
/scripts/json-schema-to-openapi-schema @sanderegg
2523
/services/agent/ @GitHK
2624
/services/api-server/ @pcrespov
27-
/services/autoscaling/ @sanderegg @pcrespov
25+
/services/autoscaling/ @sanderegg
2826
/services/catalog/ @pcrespov @sanderegg
2927
/services/clusters-keeper/ @sanderegg
3028
/services/datcore-adapter/ @sanderegg
@@ -37,10 +35,10 @@ Makefile @pcrespov @sanderegg
3735
/services/resource-usage-tracker/ @matusdrobuliak66
3836
/services/static-webserver/ @GitHK
3937
/services/static-webserver/client/ @odeimaiz
40-
/services/storage/ @sanderegg @pcrespov
38+
/services/storage/ @sanderegg
4139
/services/web/server/ @pcrespov @sanderegg @GitHK @matusdrobuliak66
4240
/tests/environment-setup/ @pcrespov
43-
/tests/performance/ @pcrespov
41+
/tests/performance/ @pcrespov @sanderegg
4442
/tests/public-api/ @pcrespov
4543
requirements/* @pcrespov
4644
tools/* @pcrespov

packages/models-library/src/models_library/docker.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@ class DockerLabelKey(ConstrainedStr):
2525
# good practice: use reverse DNS notation
2626
regex: re.Pattern[str] | None = DOCKER_LABEL_KEY_REGEX
2727

28+
@classmethod
29+
def from_key(cls, key: str) -> "DockerLabelKey":
30+
return cls(key.lower().replace("_", "-"))
31+
2832

2933
class DockerGenericTag(ConstrainedStr):
3034
# NOTE: https://docs.docker.com/engine/reference/commandline/tag/#description

packages/models-library/src/models_library/projects_state.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ class RunningState(str, Enum):
2323
PENDING = "PENDING"
2424
WAITING_FOR_RESOURCES = "WAITING_FOR_RESOURCES"
2525
STARTED = "STARTED"
26-
RETRY = "RETRY"
2726
SUCCESS = "SUCCESS"
2827
FAILED = "FAILED"
2928
ABORTED = "ABORTED"
@@ -34,7 +33,6 @@ def is_running(self) -> bool:
3433
RunningState.PENDING,
3534
RunningState.WAITING_FOR_RESOURCES,
3635
RunningState.STARTED,
37-
RunningState.RETRY,
3836
)
3937

4038

packages/models-library/src/models_library/rabbitmq_messages.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,10 @@ class RabbitResourceTrackingBaseMessage(RabbitMessageBase):
174174
service_run_id: str = Field(
175175
..., description="uniquely identitifies the service run"
176176
)
177-
created_at: datetime.datetime = Field(..., description="message creation datetime")
177+
created_at: datetime.datetime = Field(
178+
default_factory=lambda: datetime.datetime.now(datetime.timezone.utc),
179+
description="message creation datetime",
180+
)
178181

179182
def routing_key(self) -> str | None:
180183
return None
@@ -219,3 +222,10 @@ class RabbitResourceTrackingStoppedMessage(RabbitResourceTrackingBaseMessage):
219222
...,
220223
description=f"{SimcorePlatformStatus.BAD} if simcore failed to run the service properly",
221224
)
225+
226+
227+
RabbitResourceTrackingMessages = (
228+
RabbitResourceTrackingStartedMessage
229+
| RabbitResourceTrackingStoppedMessage
230+
| RabbitResourceTrackingHeartbeatMessage
231+
)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
"""add heartbeat timestamps
2+
3+
Revision ID: 6da4357ce10f
4+
Revises: 9b33ef4c690a
5+
Create Date: 2023-08-07 06:31:14.681513+00:00
6+
7+
"""
8+
import sqlalchemy as sa
9+
from alembic import op
10+
11+
# revision identifiers, used by Alembic.
12+
revision = "6da4357ce10f"
13+
down_revision = "9b33ef4c690a"
14+
branch_labels = None
15+
depends_on = None
16+
17+
18+
modified_timestamp_trigger = sa.DDL(
19+
"""
20+
DROP TRIGGER IF EXISTS trigger_auto_update on comp_tasks;
21+
CREATE TRIGGER trigger_auto_update
22+
BEFORE INSERT OR UPDATE ON comp_tasks
23+
FOR EACH ROW EXECUTE PROCEDURE comp_tasks_auto_update_modified();
24+
"""
25+
)
26+
27+
update_modified_timestamp_procedure = sa.DDL(
28+
"""
29+
CREATE OR REPLACE FUNCTION comp_tasks_auto_update_modified()
30+
RETURNS TRIGGER AS $$
31+
BEGIN
32+
NEW.modified := current_timestamp;
33+
RETURN NEW;
34+
END;
35+
$$ LANGUAGE plpgsql;
36+
"""
37+
)
38+
39+
40+
def upgrade():
41+
# ### commands auto generated by Alembic - please adjust! ###
42+
op.add_column(
43+
"comp_tasks",
44+
sa.Column("last_heartbeat", sa.DateTime(timezone=True), nullable=True),
45+
)
46+
op.add_column(
47+
"comp_tasks",
48+
sa.Column(
49+
"created",
50+
sa.DateTime(timezone=True),
51+
server_default=sa.text("now()"),
52+
nullable=False,
53+
),
54+
)
55+
op.add_column(
56+
"comp_tasks",
57+
sa.Column(
58+
"modified",
59+
sa.DateTime(timezone=True),
60+
server_default=sa.text("now()"),
61+
nullable=False,
62+
),
63+
)
64+
# ### end Alembic commands ###
65+
op.execute(update_modified_timestamp_procedure)
66+
op.execute(modified_timestamp_trigger)
67+
68+
69+
def downgrade():
70+
op.execute(sa.DDL("DROP TRIGGER IF EXISTS trigger_auto_update on comp_tasks;"))
71+
op.execute(sa.DDL("DROP FUNCTION comp_tasks_auto_update_modified();"))
72+
# ### commands auto generated by Alembic - please adjust! ###
73+
op.drop_column("comp_tasks", "modified")
74+
op.drop_column("comp_tasks", "created")
75+
op.drop_column("comp_tasks", "last_heartbeat")
76+
# ### end Alembic commands ###

packages/postgres-database/src/simcore_postgres_database/migration/versions/afc752d10a6c_add_waiting_for_resources_enum_field.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,15 @@ def upgrade():
2222

2323
conn = op.get_bind()
2424
result = conn.execute(
25-
f"SELECT * FROM pg_enum WHERE enumtypid = (SELECT oid FROM pg_type WHERE typname = '{enum_type_name}') AND enumlabel = '{new_value}'"
25+
sa.DDL(
26+
f"SELECT * FROM pg_enum WHERE enumtypid = (SELECT oid FROM pg_type WHERE typname = '{enum_type_name}') AND enumlabel = '{new_value}'"
27+
)
2628
)
2729
value_exists = result.fetchone() is not None
2830

2931
if not value_exists:
3032
# Step 1: Use ALTER TYPE to add the new value to the existing enum
31-
op.execute(f"ALTER TYPE {enum_type_name} ADD VALUE '{new_value}'")
33+
op.execute(sa.DDL(f"ALTER TYPE {enum_type_name} ADD VALUE '{new_value}'"))
3234

3335

3436
def downgrade():

packages/postgres-database/src/simcore_postgres_database/models/comp_tasks.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,11 @@
66
import sqlalchemy as sa
77
from sqlalchemy.dialects import postgresql
88

9+
from ._common import (
10+
column_created_datetime,
11+
column_modified_datetime,
12+
register_modified_datetime_auto_update_trigger,
13+
)
914
from .base import metadata
1015
from .comp_pipeline import StateType
1116

@@ -75,9 +80,17 @@ class NodeClass(enum.Enum):
7580
sa.Column("submit", sa.DateTime, doc="UTC timestamp for task submission"),
7681
sa.Column("start", sa.DateTime, doc="UTC timestamp when task started"),
7782
sa.Column("end", sa.DateTime, doc="UTC timestamp for task completion"),
83+
sa.Column(
84+
"last_heartbeat",
85+
sa.DateTime(timezone=True),
86+
doc="UTC timestamp for last task running check",
87+
),
88+
column_created_datetime(timezone=True),
89+
column_modified_datetime(timezone=True),
7890
sa.UniqueConstraint("project_id", "node_id", name="project_node_uniqueness"),
7991
)
8092

93+
register_modified_datetime_auto_update_trigger(comp_tasks)
8194

8295
DB_PROCEDURE_NAME: str = "notify_comp_tasks_changed"
8396
DB_TRIGGER_NAME: str = f"{DB_PROCEDURE_NAME}_event"

packages/postgres-database/tests/docker-compose.prod.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@ services:
66
volumes:
77
postgres_data:
88
name: ${POSTGRES_DATA_VOLUME}
9+
external: true

packages/postgres-database/tests/test_comp_tasks.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ async def test_listen_query(
106106
db_connection, task, outputs=updated_output, state=StateType.ABORTED
107107
)
108108
tasks = await _assert_notification_queue_status(db_notification_queue, 1)
109-
assert tasks[0]["changes"] == ["outputs", "state"]
109+
assert tasks[0]["changes"] == ["modified", "outputs", "state"]
110110
assert (
111111
tasks[0]["data"]["outputs"] == updated_output
112112
), f"the data received from the database is {tasks[0]}, expected new output is {updated_output}"
@@ -116,7 +116,7 @@ async def test_listen_query(
116116
await _update_comp_task_with(db_connection, task, outputs=updated_output)
117117
await _update_comp_task_with(db_connection, task, outputs=updated_output)
118118
tasks = await _assert_notification_queue_status(db_notification_queue, 1)
119-
assert tasks[0]["changes"] == ["outputs"]
119+
assert tasks[0]["changes"] == ["modified", "outputs"]
120120
assert (
121121
tasks[0]["data"]["outputs"] == updated_output
122122
), f"the data received from the database is {tasks[0]}, expected new output is {updated_output}"
@@ -132,7 +132,7 @@ async def test_listen_query(
132132
tasks = await _assert_notification_queue_status(db_notification_queue, NUM_CALLS)
133133

134134
for n, output in enumerate(update_outputs):
135-
assert tasks[n]["changes"] == ["outputs"]
135+
assert tasks[n]["changes"] == ["modified", "outputs"]
136136
assert (
137137
tasks[n]["data"]["outputs"] == output
138138
), f"the data received from the database is {tasks[n]}, expected new output is {output}"

packages/service-library/src/servicelib/rabbitmq.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,8 @@ async def subscribe(
181181
182182
Raises:
183183
aio_pika.exceptions.ChannelPreconditionFailed: In case an existing exchange with different type is used
184+
Returns:
185+
queue name
184186
"""
185187

186188
assert self._channel_pool # nosec

packages/service-library/tests/rabbitmq/test_rabbitmq.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,9 @@ async def _assert_message_received(
9999
reraise=True,
100100
):
101101
with attempt:
102-
# NOTE: this sleep is here to ensure that there are not multiple messages coming in
103-
await asyncio.sleep(1)
102+
print(
103+
f"--> waiting for rabbitmq message [{attempt.retry_state.attempt_number}, {attempt.retry_state.idle_for}]"
104+
)
104105
assert mocked_message_parser.call_count == expected_call_count
105106
if expected_call_count == 1:
106107
assert expected_message
@@ -112,6 +113,9 @@ async def _assert_message_received(
112113
else:
113114
assert expected_message
114115
mocked_message_parser.assert_any_call(expected_message.message.encode())
116+
print(
117+
f"<-- rabbitmq message received after [{attempt.retry_state.attempt_number}, {attempt.retry_state.idle_for}]"
118+
)
115119

116120

117121
async def test_rabbit_client_pub_sub_message_is_lost_if_no_consumer_present(

0 commit comments

Comments
 (0)