Skip to content

Commit 86df645

Browse files
🐛 🗃️ fix resource tracking not starting + adding project uuid to tracking table (#4416)
1 parent c127b87 commit 86df645

File tree

7 files changed

+52
-12
lines changed

7 files changed

+52
-12
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
"""adding study id to resource-tracking-container table
2+
3+
Revision ID: 417f9eb848ce
4+
Revises: add0afaaf728
5+
Create Date: 2023-06-23 14:15:20.721005+00:00
6+
7+
"""
8+
import sqlalchemy as sa
9+
from alembic import op
10+
11+
# revision identifiers, used by Alembic.
12+
revision = "417f9eb848ce"
13+
down_revision = "add0afaaf728"
14+
branch_labels = None
15+
depends_on = None
16+
17+
18+
def upgrade():
19+
# ### commands auto generated by Alembic - please adjust! ###
20+
op.add_column(
21+
"resource_tracker_container",
22+
sa.Column("project_uuid", sa.String(), nullable=False),
23+
)
24+
# ### end Alembic commands ###
25+
26+
27+
def downgrade():
28+
# ### commands auto generated by Alembic - please adjust! ###
29+
op.drop_column("resource_tracker_container", "project_uuid")
30+
# ### end Alembic commands ###

packages/postgres-database/src/simcore_postgres_database/models/resource_tracker.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,12 @@
3131
nullable=False,
3232
doc="user_id label scraped via Prometheus (taken from container labels)",
3333
),
34+
sa.Column(
35+
"project_uuid",
36+
sa.String,
37+
nullable=False,
38+
doc="project_uuid label scraped via Prometheus (taken from container labels)",
39+
),
3440
sa.Column(
3541
"product_name",
3642
sa.String,

services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/models/resource_tracker_container.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from arrow import Arrow
44
from models_library.products import ProductName
5+
from models_library.projects import ProjectID
56
from models_library.users import UserID
67
from pydantic import BaseModel
78

@@ -11,6 +12,7 @@ class ContainerResourceUsageMetric(BaseModel):
1112
image: str
1213
user_id: UserID
1314
product_name: ProductName
15+
project_uuid: ProjectID
1416
service_settings_reservation_nano_cpus: int | None
1517
service_settings_reservation_memory_bytes: int | None
1618
service_settings_reservation_additional_info: dict[str, Any] = {}

services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/modules/db/repositories/resource_tracker.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ async def upsert_resource_tracker_container_data_(
1919
container_id=data.container_id,
2020
image=data.image,
2121
user_id=data.user_id,
22+
project_uuid=str(data.project_uuid),
2223
product_name=data.product_name,
2324
service_settings_reservation_nano_cpus=data.service_settings_reservation_nano_cpus,
2425
service_settings_reservation_memory_bytes=data.service_settings_reservation_memory_bytes,

services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/resource_tracker_core.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,11 @@
1616
_logger = logging.getLogger(__name__)
1717

1818

19-
async def _prometheus_client_custom_query(
19+
def _prometheus_sync_client_custom_query(
2020
prometheus_client: PrometheusConnect, promql_cpu_query: str
2121
) -> list[dict]:
2222
_logger.info("Querying prometheus with: %s", promql_cpu_query)
23-
data: list[dict] = await asyncio.get_event_loop().run_in_executor(
24-
None, prometheus_client.custom_query(promql_cpu_query)
25-
)
23+
data: list[dict] = prometheus_client.custom_query(promql_cpu_query)
2624
return data
2725

2826

@@ -33,8 +31,10 @@ async def _scrape_and_upload_container_resource_usage(
3331
) -> None:
3432
# Query CPU seconds
3533
promql_cpu_query = f"sum without (cpu) (container_cpu_usage_seconds_total{{image=~'{image_regex}'}})[30m:1m]"
36-
containers_cpu_seconds_usage: list = await _prometheus_client_custom_query(
37-
prometheus_client, promql_cpu_query
34+
containers_cpu_seconds_usage: list[
35+
dict
36+
] = await asyncio.get_event_loop().run_in_executor(
37+
None, _prometheus_sync_client_custom_query, prometheus_client, promql_cpu_query
3838
)
3939
_logger.info(
4040
"Received %s containers from Prometheus", len(containers_cpu_seconds_usage)
@@ -74,6 +74,7 @@ async def _scrape_and_upload_container_resource_usage(
7474
image=metric["image"],
7575
user_id=metric["container_label_user_id"],
7676
product_name=metric["container_label_product_name"],
77+
project_uuid=metric["container_label_study_id"],
7778
service_settings_reservation_nano_cpus=int(nano_cpus)
7879
if nano_cpus
7980
else None,

services/resource-usage-tracker/tests/unit/with_dbs/test_collect_container_resource_usage_task.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def mocked_prometheus_client_custom_query(
3434
data = json.load(file)
3535

3636
mocked_get_prometheus_api_client = mocker.patch(
37-
"simcore_service_resource_usage_tracker.resource_tracker_core._prometheus_client_custom_query",
37+
"simcore_service_resource_usage_tracker.resource_tracker_core._prometheus_sync_client_custom_query",
3838
autospec=True,
3939
return_value=data,
4040
)

services/resource-usage-tracker/tests/unit/with_dbs/test_collect_container_resource_usage_task__on_update_set.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def random_promql_output_generator():
7777
"container_label_product_name": "osparc",
7878
"container_label_simcore_service_settings": '[{"name": "ports", "type": "int", "value": 8888}, {"name": "env", "type": "string", "value": ["DISPLAY=:0"]}, {"name": "env", "type": "string", "value": ["SYM_SERVER_HOSTNAME=sym-server_%service_uuid%"]}, {"name": "mount", "type": "object", "value": [{"ReadOnly": true, "Source": "/tmp/.X11-unix", "Target": "/tmp/.X11-unix", "Type": "bind"}]}, {"name": "constraints", "type": "string", "value": ["node.platform.os == linux"]}, {"name": "Resources", "type": "Resources", "value": {"Limits": {"NanoCPUs": 4000000000, "MemoryBytes": 17179869184}, "Reservations": {"NanoCPUs": 100000000, "MemoryBytes": 536870912, "GenericResources": [{"DiscreteResourceSpec": {"Kind": "VRAM", "Value": 1}}]}}}]',
7979
"container_label_simcore_user_agent": "puppeteer",
80-
"container_label_study_id": "52d7e1a8-0c27-11ee-bec2-024201234c7",
80+
"container_label_study_id": "46449cc3-7d83-4081-a44e-fc75a0c85f2c",
8181
"container_label_user_id": "43820",
8282
"container_label_uuid": "2b231c38-0ebc-5cc0-1234-1ffe573f54e9",
8383
"id": "/docker/58e1138d51eb5eafd737024d0df0b01ef88f2087e5a3922565c59130d57ac7a3",
@@ -106,7 +106,7 @@ def mocked_prometheus_client_custom_query(
106106
mocker: MockerFixture, project_slug_dir: Path, random_promql_output_generator
107107
) -> dict[str, mock.Mock]:
108108
mocked_get_prometheus_api_client = mocker.patch(
109-
"simcore_service_resource_usage_tracker.resource_tracker_core._prometheus_client_custom_query",
109+
"simcore_service_resource_usage_tracker.resource_tracker_core._prometheus_sync_client_custom_query",
110110
autospec=True,
111111
return_value=random_promql_output_generator["data"],
112112
)
@@ -135,13 +135,13 @@ async def test_collect_container_resource_usage_task(
135135
assert len(db_rows) == 1
136136

137137
assert (
138-
random_promql_output_generator["max_float"] == db_rows[0][7]
138+
random_promql_output_generator["max_float"] == db_rows[0][8]
139139
) # <-- container_cpu_usage_seconds_total
140140
assert (
141141
arrow.get(random_promql_output_generator["min_timestamp"]).datetime
142-
== db_rows[0][8]
142+
== db_rows[0][9]
143143
) # <-- prometheus_created
144144
assert (
145145
arrow.get(random_promql_output_generator["max_timestamp"]).datetime
146-
== db_rows[0][9]
146+
== db_rows[0][10]
147147
) # <-- prometheus_last_scraped

0 commit comments

Comments
 (0)