Skip to content

Commit c20bf3e

Browse files
✨ (⚠️ devops) 🗃️ Is922 resource tracking/1. version of regular scraping (#4380)
1 parent b537b67 commit c20bf3e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+1578
-277
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
"""adding resource tracker container table
2+
3+
Revision ID: 6e91067932f2
4+
Revises: 52cf00912ad9
5+
Create Date: 2023-06-21 14:12:40.292816+00:00
6+
7+
"""
8+
import sqlalchemy as sa
9+
from alembic import op
10+
from sqlalchemy.dialects import postgresql
11+
12+
# revision identifiers, used by Alembic.
13+
revision = "6e91067932f2"
14+
down_revision = "52cf00912ad9"
15+
branch_labels = None
16+
depends_on = None
17+
18+
19+
def upgrade():
20+
# ### commands auto generated by Alembic - please adjust! ###
21+
op.create_table(
22+
"resource_tracker_container",
23+
sa.Column("container_id", sa.String(), nullable=False),
24+
sa.Column("image", sa.String(), nullable=False),
25+
sa.Column("user_id", sa.BigInteger(), nullable=False),
26+
sa.Column("product_name", sa.String(), nullable=False),
27+
sa.Column(
28+
"service_settings_reservation_nano_cpus", sa.BigInteger(), nullable=True
29+
),
30+
sa.Column(
31+
"service_settings_reservation_memory_bytes", sa.BigInteger(), nullable=True
32+
),
33+
sa.Column(
34+
"service_settings_reservation_additional_info",
35+
postgresql.JSONB(astext_type=sa.Text()),
36+
nullable=False,
37+
),
38+
sa.Column("container_cpu_usage_seconds_total", sa.Float(), nullable=False),
39+
sa.Column("prometheus_created", sa.DateTime(timezone=True), nullable=False),
40+
sa.Column(
41+
"prometheus_last_scraped", sa.DateTime(timezone=True), nullable=False
42+
),
43+
sa.Column(
44+
"modified",
45+
sa.DateTime(timezone=True),
46+
server_default=sa.text("now()"),
47+
nullable=False,
48+
),
49+
sa.PrimaryKeyConstraint("container_id", name="resource_tracker_container_pkey"),
50+
)
51+
# ### end Alembic commands ###
52+
53+
54+
def downgrade():
55+
# ### commands auto generated by Alembic - please adjust! ###
56+
op.drop_table("resource_tracker_container")
57+
# ### end Alembic commands ###
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
""" resource_tracker_container table
2+
3+
- Table where we store the resource usage of each container that
4+
we scrape via resource-usage-tracker service
5+
"""
6+
7+
import sqlalchemy as sa
8+
from sqlalchemy.dialects.postgresql import JSONB
9+
10+
from ._common import column_modified_datetime
11+
from .base import metadata
12+
13+
resource_tracker_container = sa.Table(
14+
"resource_tracker_container",
15+
metadata,
16+
sa.Column(
17+
"container_id",
18+
sa.String,
19+
nullable=False,
20+
doc="Refers to container id scraped via Prometheus",
21+
),
22+
sa.Column(
23+
"image",
24+
sa.String,
25+
nullable=False,
26+
doc="image label scraped via Prometheus (taken from container labels), ex. registry.osparc.io/simcore/services/dynamic/jupyter-smash:3.0.9",
27+
),
28+
sa.Column(
29+
"user_id",
30+
sa.BigInteger,
31+
nullable=False,
32+
doc="user_id label scraped via Prometheus (taken from container labels)",
33+
),
34+
sa.Column(
35+
"product_name",
36+
sa.String,
37+
nullable=False,
38+
doc="product_name label scraped via Prometheus (taken from container labels)",
39+
),
40+
sa.Column(
41+
"service_settings_reservation_nano_cpus",
42+
sa.BigInteger,
43+
nullable=True,
44+
doc="CPU resource allocated to a container, ex.500000000 means that the container is allocated 0.5 CPU shares",
45+
),
46+
sa.Column(
47+
"service_settings_reservation_memory_bytes",
48+
sa.BigInteger,
49+
nullable=True,
50+
doc="memory limit in bytes scraped via Prometheus",
51+
),
52+
sa.Column(
53+
"service_settings_reservation_additional_info",
54+
JSONB,
55+
nullable=False,
56+
doc="storing additional information about the reservation settings",
57+
),
58+
sa.Column("container_cpu_usage_seconds_total", sa.Float, nullable=False),
59+
sa.Column(
60+
"prometheus_created",
61+
sa.DateTime(timezone=True),
62+
nullable=False,
63+
doc="First container creation timestamp (UTC timestamp)",
64+
),
65+
sa.Column(
66+
"prometheus_last_scraped",
67+
sa.DateTime(timezone=True),
68+
nullable=False,
69+
doc="Last prometheus scraped timestamp (UTC timestamp)",
70+
),
71+
column_modified_datetime(timezone=True),
72+
# ---------------------------
73+
sa.PrimaryKeyConstraint("container_id", name="resource_tracker_container_pkey"),
74+
)
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
import logging
2+
3+
from fastapi import FastAPI
4+
from settings_library.postgres import PostgresSettings
5+
from simcore_postgres_database.utils_aiosqlalchemy import (
6+
get_pg_engine_stateinfo,
7+
raise_if_migration_not_ready,
8+
)
9+
from sqlalchemy.ext.asyncio import AsyncEngine, create_async_engine
10+
from tenacity import retry
11+
12+
from .retry_policies import PostgresRetryPolicyUponInitialization
13+
14+
logger = logging.getLogger(__name__)
15+
16+
17+
@retry(**PostgresRetryPolicyUponInitialization(logger).kwargs)
18+
async def connect_to_db(app: FastAPI, cfg: PostgresSettings) -> None:
19+
logger.debug("Connecting db ...")
20+
21+
engine: AsyncEngine = create_async_engine(
22+
cfg.dsn_with_async_sqlalchemy,
23+
pool_size=cfg.POSTGRES_MINSIZE,
24+
max_overflow=cfg.POSTGRES_MAXSIZE - cfg.POSTGRES_MINSIZE,
25+
connect_args={
26+
"server_settings": {"application_name": cfg.POSTGRES_CLIENT_NAME}
27+
},
28+
pool_pre_ping=True, # https://docs.sqlalchemy.org/en/14/core/pooling.html#dealing-with-disconnects
29+
future=True, # this uses sqlalchemy 2.0 API, shall be removed when sqlalchemy 2.0 is released
30+
)
31+
32+
logger.debug("Connected to %s", engine.url) # pylint: disable=no-member
33+
34+
logger.debug("Checking db migration...")
35+
try:
36+
await raise_if_migration_not_ready(engine)
37+
except Exception:
38+
# NOTE: engine must be closed because retry will create a new engine
39+
await engine.dispose()
40+
raise
41+
42+
logger.debug("Migration up-to-date")
43+
44+
app.state.engine = engine
45+
46+
logger.debug(
47+
"Setup engine: %s",
48+
await get_pg_engine_stateinfo(engine),
49+
)
50+
51+
52+
async def close_db_connection(app: FastAPI) -> None:
53+
logger.debug("Disconnecting db ...")
54+
55+
if engine := app.state.engine:
56+
await engine.dispose()
57+
58+
logger.debug("Disconnected from %s", engine.url) # pylint: disable=no-member

services/catalog/src/simcore_service_catalog/core/events.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,9 @@
33

44
from fastapi import FastAPI
55
from models_library.basic_types import BootModeEnum
6+
from servicelib.db_async_engine import close_db_connection, connect_to_db
67

7-
from ..db.events import close_db_connection, connect_to_db, setup_default_product
8+
from ..db.events import setup_default_product
89
from ..services.director import close_director, setup_director
910
from ..services.remote_debug import setup_remote_debugging
1011
from .background_tasks import start_registry_sync_task, stop_registry_sync_task
@@ -23,7 +24,7 @@ async def start_app() -> None:
2324

2425
# setup connection to pg db
2526
if app.state.settings.CATALOG_POSTGRES:
26-
await connect_to_db(app)
27+
await connect_to_db(app, app.state.settings.CATALOG_POSTGRES)
2728
await setup_default_product(app)
2829

2930
if app.state.settings.CATALOG_DIRECTOR:
Lines changed: 0 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,65 +1,12 @@
11
import logging
22

33
from fastapi import FastAPI
4-
from servicelib.retry_policies import PostgresRetryPolicyUponInitialization
5-
from settings_library.postgres import PostgresSettings
6-
from simcore_postgres_database.utils_aiosqlalchemy import (
7-
get_pg_engine_stateinfo,
8-
raise_if_migration_not_ready,
9-
)
10-
from sqlalchemy.ext.asyncio import AsyncEngine, create_async_engine
11-
from tenacity import retry
124

135
from .repositories.products import ProductsRepository
146

157
logger = logging.getLogger(__name__)
168

179

18-
@retry(**PostgresRetryPolicyUponInitialization(logger).kwargs)
19-
async def connect_to_db(app: FastAPI) -> None:
20-
logger.debug("Connecting db ...")
21-
cfg: PostgresSettings = app.state.settings.CATALOG_POSTGRES
22-
23-
engine: AsyncEngine = create_async_engine(
24-
cfg.dsn_with_async_sqlalchemy,
25-
pool_size=cfg.POSTGRES_MINSIZE,
26-
max_overflow=cfg.POSTGRES_MAXSIZE - cfg.POSTGRES_MINSIZE,
27-
connect_args={
28-
"server_settings": {"application_name": cfg.POSTGRES_CLIENT_NAME}
29-
},
30-
pool_pre_ping=True, # https://docs.sqlalchemy.org/en/14/core/pooling.html#dealing-with-disconnects
31-
future=True, # this uses sqlalchemy 2.0 API, shall be removed when sqlalchemy 2.0 is released
32-
)
33-
34-
logger.debug("Connected to %s", engine.url) # pylint: disable=no-member
35-
36-
logger.debug("Checking db migration...")
37-
try:
38-
await raise_if_migration_not_ready(engine)
39-
except Exception:
40-
# NOTE: engine must be closed because retry will create a new engine
41-
await engine.dispose()
42-
raise
43-
44-
logger.debug("Migration up-to-date")
45-
46-
app.state.engine = engine
47-
48-
logger.debug(
49-
"Setup engine: %s",
50-
await get_pg_engine_stateinfo(engine),
51-
)
52-
53-
54-
async def close_db_connection(app: FastAPI) -> None:
55-
logger.debug("Disconnecting db ...")
56-
57-
if engine := app.state.engine:
58-
await engine.dispose()
59-
60-
logger.debug("Disconnected from %s", engine.url) # pylint: disable=no-member
61-
62-
6310
async def setup_default_product(app: FastAPI):
6411
repo = ProductsRepository(db_engine=app.state.engine)
6512
app.state.default_product_name = await repo.get_default_product_name()

services/docker-compose.devel.yml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,16 @@ services:
124124
- SC_BOOT_MODE=debug-ptvsd
125125
- RESOURCE_USAGE_TRACKER_LOGLEVEL=DEBUG
126126
- DEBUG=true
127+
- LOG_FORMAT_LOCAL_DEV_ENABLED=${LOG_FORMAT_LOCAL_DEV_ENABLED}
128+
- POSTGRES_DB=${POSTGRES_DB}
129+
- POSTGRES_ENDPOINT=${POSTGRES_ENDPOINT}
130+
- POSTGRES_HOST=${POSTGRES_HOST}
131+
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
132+
- POSTGRES_PORT=${POSTGRES_PORT}
133+
- POSTGRES_USER=${POSTGRES_USER}
134+
- PROMETHEUS_URL=${RESOURCE_USAGE_TRACKER_PROMETHEUS_URL}
135+
- PROMETHEUS_USERNAME=${RESOURCE_USAGE_TRACKER_PROMETHEUS_USERNAME}
136+
- PROMETHEUS_PASSWORD=${RESOURCE_USAGE_TRACKER_PROMETHEUS_PASSWORD}
127137
volumes:
128138
- ./resource-usage-tracker:/devel/services/resource-usage-tracker
129139
- ../packages:/devel/packages

services/docker-compose.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -194,9 +194,9 @@ services:
194194
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
195195
- POSTGRES_PORT=${POSTGRES_PORT}
196196
- POSTGRES_USER=${POSTGRES_USER}
197-
- PROMETHEUS_URL=${PROMETHEUS_URL}
198-
- PROMETHEUS_USERNAME=${PROMETHEUS_USERNAME}
199-
- PROMETHEUS_PASSWORD=${PROMETHEUS_PASSWORD}
197+
- PROMETHEUS_URL=${RESOURCE_USAGE_TRACKER_PROMETHEUS_URL}
198+
- PROMETHEUS_USERNAME=${RESOURCE_USAGE_TRACKER_PROMETHEUS_USERNAME}
199+
- PROMETHEUS_PASSWORD=${RESOURCE_USAGE_TRACKER_PROMETHEUS_PASSWORD}
200200
- RESOURCE_USAGE_TRACKER_LOGLEVEL=${LOG_LEVEL:-INFO}
201201

202202
static-webserver:
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
RESOURCE_USAGE_TRACKER_DEV_FEATURES_ENABLED=1
2+
3+
LOG_LEVEL=DEBUG
4+
5+
POSTGRES_USER=test
6+
POSTGRES_PASSWORD=test
7+
POSTGRES_DB=test
8+
POSTGRES_HOST=localhost

services/resource-usage-tracker/requirements/_base.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
# intra-repo required dependencies
99
--requirement ../../../packages/models-library/requirements/_base.in
1010
--requirement ../../../packages/settings-library/requirements/_base.in
11+
--requirement ../../../packages/postgres-database/requirements/_base.in
1112
# service-library[fastapi]
1213
--requirement ../../../packages/service-library/requirements/_base.in
1314
--requirement ../../../packages/service-library/requirements/_fastapi.in

0 commit comments

Comments
 (0)