Skip to content

πŸ› πŸ—ƒοΈ fix resource tracking not starting + adding project uuid to tracking table #4416

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""adding study id to resource-tracking-container table

Revision ID: 417f9eb848ce
Revises: add0afaaf728
Create Date: 2023-06-23 14:15:20.721005+00:00

"""
import sqlalchemy as sa
from alembic import op

# revision identifiers, used by Alembic.
revision = "417f9eb848ce"
down_revision = "add0afaaf728"
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column(
"resource_tracker_container",
sa.Column("project_uuid", sa.String(), nullable=False),
)
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column("resource_tracker_container", "project_uuid")
# ### end Alembic commands ###
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@
nullable=False,
doc="user_id label scraped via Prometheus (taken from container labels)",
),
sa.Column(
"project_uuid",
sa.String,
nullable=False,
doc="project_uuid label scraped via Prometheus (taken from container labels)",
),
sa.Column(
"product_name",
sa.String,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from arrow import Arrow
from models_library.products import ProductName
from models_library.projects import ProjectID
from models_library.users import UserID
from pydantic import BaseModel

Expand All @@ -11,6 +12,7 @@ class ContainerResourceUsageMetric(BaseModel):
image: str
user_id: UserID
product_name: ProductName
project_uuid: ProjectID
service_settings_reservation_nano_cpus: int | None
service_settings_reservation_memory_bytes: int | None
service_settings_reservation_additional_info: dict[str, Any] = {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ async def upsert_resource_tracker_container_data_(
container_id=data.container_id,
image=data.image,
user_id=data.user_id,
project_uuid=str(data.project_uuid),
product_name=data.product_name,
service_settings_reservation_nano_cpus=data.service_settings_reservation_nano_cpus,
service_settings_reservation_memory_bytes=data.service_settings_reservation_memory_bytes,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,11 @@
_logger = logging.getLogger(__name__)


async def _prometheus_client_custom_query(
def _prometheus_sync_client_custom_query(
prometheus_client: PrometheusConnect, promql_cpu_query: str
) -> list[dict]:
_logger.info("Querying prometheus with: %s", promql_cpu_query)
data: list[dict] = await asyncio.get_event_loop().run_in_executor(
None, prometheus_client.custom_query(promql_cpu_query)
)
data: list[dict] = prometheus_client.custom_query(promql_cpu_query)
return data


Expand All @@ -33,8 +31,10 @@ async def _scrape_and_upload_container_resource_usage(
) -> None:
# Query CPU seconds
promql_cpu_query = f"sum without (cpu) (container_cpu_usage_seconds_total{{image=~'{image_regex}'}})[30m:1m]"
containers_cpu_seconds_usage: list = await _prometheus_client_custom_query(
prometheus_client, promql_cpu_query
containers_cpu_seconds_usage: list[
dict
] = await asyncio.get_event_loop().run_in_executor(
None, _prometheus_sync_client_custom_query, prometheus_client, promql_cpu_query
)
_logger.info(
"Received %s containers from Prometheus", len(containers_cpu_seconds_usage)
Expand Down Expand Up @@ -74,6 +74,7 @@ async def _scrape_and_upload_container_resource_usage(
image=metric["image"],
user_id=metric["container_label_user_id"],
product_name=metric["container_label_product_name"],
project_uuid=metric["container_label_study_id"],
service_settings_reservation_nano_cpus=int(nano_cpus)
if nano_cpus
else None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def mocked_prometheus_client_custom_query(
data = json.load(file)

mocked_get_prometheus_api_client = mocker.patch(
"simcore_service_resource_usage_tracker.resource_tracker_core._prometheus_client_custom_query",
"simcore_service_resource_usage_tracker.resource_tracker_core._prometheus_sync_client_custom_query",
autospec=True,
return_value=data,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def random_promql_output_generator():
"container_label_product_name": "osparc",
"container_label_simcore_service_settings": '[{"name": "ports", "type": "int", "value": 8888}, {"name": "env", "type": "string", "value": ["DISPLAY=:0"]}, {"name": "env", "type": "string", "value": ["SYM_SERVER_HOSTNAME=sym-server_%service_uuid%"]}, {"name": "mount", "type": "object", "value": [{"ReadOnly": true, "Source": "/tmp/.X11-unix", "Target": "/tmp/.X11-unix", "Type": "bind"}]}, {"name": "constraints", "type": "string", "value": ["node.platform.os == linux"]}, {"name": "Resources", "type": "Resources", "value": {"Limits": {"NanoCPUs": 4000000000, "MemoryBytes": 17179869184}, "Reservations": {"NanoCPUs": 100000000, "MemoryBytes": 536870912, "GenericResources": [{"DiscreteResourceSpec": {"Kind": "VRAM", "Value": 1}}]}}}]',
"container_label_simcore_user_agent": "puppeteer",
"container_label_study_id": "52d7e1a8-0c27-11ee-bec2-024201234c7",
"container_label_study_id": "46449cc3-7d83-4081-a44e-fc75a0c85f2c",
"container_label_user_id": "43820",
"container_label_uuid": "2b231c38-0ebc-5cc0-1234-1ffe573f54e9",
"id": "/docker/58e1138d51eb5eafd737024d0df0b01ef88f2087e5a3922565c59130d57ac7a3",
Expand Down Expand Up @@ -106,7 +106,7 @@ def mocked_prometheus_client_custom_query(
mocker: MockerFixture, project_slug_dir: Path, random_promql_output_generator
) -> dict[str, mock.Mock]:
mocked_get_prometheus_api_client = mocker.patch(
"simcore_service_resource_usage_tracker.resource_tracker_core._prometheus_client_custom_query",
"simcore_service_resource_usage_tracker.resource_tracker_core._prometheus_sync_client_custom_query",
autospec=True,
return_value=random_promql_output_generator["data"],
)
Expand Down Expand Up @@ -135,13 +135,13 @@ async def test_collect_container_resource_usage_task(
assert len(db_rows) == 1

assert (
random_promql_output_generator["max_float"] == db_rows[0][7]
random_promql_output_generator["max_float"] == db_rows[0][8]
) # <-- container_cpu_usage_seconds_total
assert (
arrow.get(random_promql_output_generator["min_timestamp"]).datetime
== db_rows[0][8]
== db_rows[0][9]
) # <-- prometheus_created
assert (
arrow.get(random_promql_output_generator["max_timestamp"]).datetime
== db_rows[0][9]
== db_rows[0][10]
) # <-- prometheus_last_scraped