Skip to content

Commit 50b5eb2

Browse files
GitHKAndrei Neagu
and
Andrei Neagu
authored
✨Replacing dynamic sidecar proxy with Caddy (#2597)
* from traefik to caddy proxy * removed traefik rules proxy configuration * update proxy config * extended API to return the name of entrypoint container * replaced traefik with caddy in dy-sidecar * fixed imports * moved to constant and clarified usage * fix pylint * fixed responses * fixed API path name * added missing response * updated endpoint * moved description to constnts * made method private * changed API signature * update docstring * fix codeclimate * fixing resource allocation for the proxy * no longer needed * fixed http error types * fixed logging for each retry * using correct name in filter * adjusted proxy limits and restart policy * removed comment * updated openapi spec * added soem typing * added proper regex validation for ids * aded refactor note * changed interface for fetching container name * removed unused Co-authored-by: Andrei Neagu <[email protected]>
1 parent bf2f18d commit 50b5eb2

File tree

16 files changed

+505
-230
lines changed

16 files changed

+505
-230
lines changed

services/director-v2/src/simcore_service_director_v2/core/settings.py

+13-26
Original file line numberDiff line numberDiff line change
@@ -67,33 +67,12 @@ class CelerySettings(BaseCelerySettings):
6767
CELERY_PUBLICATION_TIMEOUT: int = 60
6868

6969

70-
class DynamicSidecarTraefikSettings(BaseCustomSettings):
71-
DYNAMIC_SIDECAR_TRAEFIK_VERSION: str = Field(
72-
"v2.4.13",
73-
description="current version of the Traefik image to be pulled and used from dockerhub",
74-
)
75-
DYNAMIC_SIDECAR_TRAEFIK_LOGLEVEL: str = Field(
76-
"warn", description="set Treafik's loglevel to be used"
77-
)
78-
79-
DYNAMIC_SIDECAR_TRAEFIK_ACCESS_LOG: bool = Field(
80-
False, description="enables or disables access log"
70+
class DynamicSidecarProxySettings(BaseCustomSettings):
71+
DYNAMIC_SIDECAR_CADDY_VERSION: str = Field(
72+
"2.4.5-alpine",
73+
description="current version of the Caddy image to be pulled and used from dockerhub",
8174
)
8275

83-
@validator("DYNAMIC_SIDECAR_TRAEFIK_LOGLEVEL", pre=True)
84-
@classmethod
85-
def validate_log_level(cls, v) -> str:
86-
if v not in SUPPORTED_TRAEFIK_LOG_LEVELS:
87-
message = (
88-
"Got log level '{v}', expected one of '{SUPPORTED_TRAEFIK_LOG_LEVELS}'"
89-
)
90-
raise ValueError(message)
91-
return v
92-
93-
@cached_property
94-
def access_log_as_string(self) -> str:
95-
return str(self.DYNAMIC_SIDECAR_TRAEFIK_ACCESS_LOG).lower()
96-
9776

9877
class DynamicSidecarSettings(BaseCustomSettings):
9978
SC_BOOT_MODE: BootModeEnum = Field(
@@ -147,6 +126,14 @@ class DynamicSidecarSettings(BaseCustomSettings):
147126
"twards the dynamic-sidecar, as is the case with the above timeout field."
148127
),
149128
)
129+
DYNAMIC_SIDECAR_WAIT_FOR_CONTAINERS_TO_START: PositiveFloat = Field(
130+
60.0 * 60.0,
131+
description=(
132+
"After running `docker-compose up`, images might need to be pulled "
133+
"before everything is started. Using default 1hour timeout to let this "
134+
"operation finish."
135+
),
136+
)
150137

151138
TRAEFIK_SIMCORE_ZONE: str = Field(
152139
...,
@@ -158,7 +145,7 @@ class DynamicSidecarSettings(BaseCustomSettings):
158145
description="in case there are several deployments on the same docker swarm, it is attached as a label on all spawned services",
159146
)
160147

161-
DYNAMIC_SIDECAR_TRAEFIK_SETTINGS: DynamicSidecarTraefikSettings
148+
DYNAMIC_SIDECAR_PROXY_SETTINGS: DynamicSidecarProxySettings
162149

163150
REGISTRY: RegistrySettings
164151

services/director-v2/src/simcore_service_director_v2/models/schemas/dynamic_services/scheduler.py

+20-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
PathMappingsLabel,
1414
SimcoreServiceLabels,
1515
)
16-
from pydantic import BaseModel, Extra, Field, PositiveInt, PrivateAttr
16+
from pydantic import BaseModel, Extra, Field, PositiveInt, PrivateAttr, constr
1717

1818
from ..constants import (
1919
DYNAMIC_PROXY_SERVICE_PREFIX,
@@ -27,6 +27,9 @@
2727

2828
MAX_ALLOWED_SERVICE_NAME_LENGTH: int = 63
2929

30+
SHA256 = constr(max_length=64, regex=r"\b[A-Fa-f0-9]{64}\b")
31+
ServiceId = SHA256
32+
NetworkId = SHA256
3033

3134
logger = logging.getLogger()
3235

@@ -141,6 +144,7 @@ def compose_spec_submitted(self) -> bool:
141144
scription="docker inspect results from all the container ran at regular intervals",
142145
)
143146

147+
was_dynamic_sidecar_started: bool = False
144148
were_services_created: bool = Field(
145149
False,
146150
description=(
@@ -149,6 +153,21 @@ def compose_spec_submitted(self) -> bool:
149153
),
150154
)
151155

156+
# below had already been validated and
157+
# used only to start the proxy
158+
dynamic_sidecar_id: Optional[ServiceId] = Field(
159+
None, description="returned by the docker engine; used for starting the proxy"
160+
)
161+
dynamic_sidecar_network_id: Optional[NetworkId] = Field(
162+
None, description="returned by the docker engine; used for starting the proxy"
163+
)
164+
swarm_network_id: Optional[NetworkId] = Field(
165+
None, description="returned by the docker engine; used for starting the proxy"
166+
)
167+
swarm_network_name: Optional[str] = Field(
168+
None, description="used for starting the proxy"
169+
)
170+
152171
@property
153172
def can_save_state(self) -> bool:
154173
"""

services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/client_api.py

+41-8
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,19 @@
1+
import json
12
import logging
23
import traceback
34
from typing import Any, Dict
45

56
import httpx
67
from fastapi import FastAPI
8+
from starlette import status
79

810
from ...core.settings import DynamicSidecarSettings
911
from ...models.schemas.dynamic_services import SchedulerData
10-
from .errors import DynamicSchedulerException, DynamicSidecarNetworkError
12+
from .errors import (
13+
DynamicSchedulerException,
14+
DynamicSidecarNetworkError,
15+
EntrypointContainerNotFoundError,
16+
)
1117

1218
logger = logging.getLogger(__name__)
1319

@@ -41,6 +47,8 @@ class DynamicSidecarClient:
4147
# The previous implementation (with a shared client) raised
4248
# RuntimeErrors because resources were already locked.
4349

50+
API_VERSION = "v1"
51+
4452
def __init__(self, app: FastAPI):
4553
dynamic_sidecar_settings: DynamicSidecarSettings = (
4654
app.state.settings.DYNAMIC_SERVICES.DYNAMIC_SIDECAR
@@ -73,11 +81,11 @@ async def containers_inspect(self, dynamic_sidecar_endpoint: str) -> Dict[str, A
7381
returns dict containing docker inspect result form
7482
all dynamic-sidecar started containers
7583
"""
76-
url = get_url(dynamic_sidecar_endpoint, "/v1/containers")
84+
url = get_url(dynamic_sidecar_endpoint, f"/{self.API_VERSION}/containers")
7785
try:
7886
async with httpx.AsyncClient(timeout=self._base_timeout) as client:
7987
response = await client.get(url=url)
80-
if response.status_code != 200:
88+
if response.status_code != status.HTTP_200_OK:
8189
message = (
8290
f"error during request status={response.status_code}, "
8391
f"body={response.text}"
@@ -93,11 +101,11 @@ async def containers_inspect(self, dynamic_sidecar_endpoint: str) -> Dict[str, A
93101
async def containers_docker_status(
94102
self, dynamic_sidecar_endpoint: str
95103
) -> Dict[str, Dict[str, str]]:
96-
url = get_url(dynamic_sidecar_endpoint, "/v1/containers")
104+
url = get_url(dynamic_sidecar_endpoint, f"/{self.API_VERSION}/containers")
97105
try:
98106
async with httpx.AsyncClient(timeout=self._base_timeout) as client:
99107
response = await client.get(url=url, params=dict(only_status=True))
100-
if response.status_code != 200:
108+
if response.status_code != status.HTTP_200_OK:
101109
logging.warning(
102110
"error during request status=%s, body=%s",
103111
response.status_code,
@@ -114,7 +122,7 @@ async def start_service_creation(
114122
self, dynamic_sidecar_endpoint: str, compose_spec: str
115123
) -> None:
116124
"""returns: True if the compose up was submitted correctly"""
117-
url = get_url(dynamic_sidecar_endpoint, "/v1/containers")
125+
url = get_url(dynamic_sidecar_endpoint, f"/{self.API_VERSION}/containers")
118126
try:
119127
async with httpx.AsyncClient(timeout=self._base_timeout) as client:
120128
response = await client.post(url, data=compose_spec)
@@ -134,11 +142,11 @@ async def start_service_creation(
134142

135143
async def begin_service_destruction(self, dynamic_sidecar_endpoint: str) -> None:
136144
"""runs docker compose down on the started spec"""
137-
url = get_url(dynamic_sidecar_endpoint, "/v1/containers:down")
145+
url = get_url(dynamic_sidecar_endpoint, f"/{self.API_VERSION}/containers:down")
138146
try:
139147
async with httpx.AsyncClient(timeout=self._base_timeout) as client:
140148
response = await client.post(url)
141-
if response.status_code != 200:
149+
if response.status_code != status.HTTP_200_OK:
142150
message = (
143151
f"ERROR during service destruction request: "
144152
f"status={response.status_code}, body={response.text}"
@@ -151,6 +159,31 @@ async def begin_service_destruction(self, dynamic_sidecar_endpoint: str) -> None
151159
log_httpx_http_error(url, "POST", traceback.format_exc())
152160
raise e
153161

162+
async def get_entrypoint_container_name(
163+
self, dynamic_sidecar_endpoint: str, dynamic_sidecar_network_name: str
164+
) -> str:
165+
"""
166+
While this API raises EntrypointContainerNotFoundError
167+
it should be called again, because in the menwhile the containers
168+
might still be starting.
169+
"""
170+
filters = json.dumps({"network": dynamic_sidecar_network_name})
171+
url = get_url(
172+
dynamic_sidecar_endpoint,
173+
f"/{self.API_VERSION}/containers/name?filters={filters}",
174+
)
175+
try:
176+
async with httpx.AsyncClient(timeout=self._base_timeout) as client:
177+
response = await client.get(url=url)
178+
if response.status_code == status.HTTP_404_NOT_FOUND:
179+
raise EntrypointContainerNotFoundError()
180+
response.raise_for_status()
181+
182+
return response.json()
183+
except httpx.HTTPError:
184+
log_httpx_http_error(url, "GET", traceback.format_exc())
185+
raise
186+
154187

155188
async def setup_api_client(app: FastAPI) -> None:
156189
logger.debug("dynamic-sidecar api client setup")

services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_api.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ def _make_pending() -> Tuple[ServiceState, str]:
250250
return service_state, message
251251

252252

253-
async def are_services_missing(
253+
async def is_dynamic_sidecar_missing(
254254
node_uuid: NodeID, dynamic_sidecar_settings: DynamicSidecarSettings
255255
) -> bool:
256256
"""Used to check if the service should be created"""

services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_compose_specs.py

+7-33
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@
33

44
import yaml
55
from fastapi.applications import FastAPI
6-
from models_library.service_settings_labels import ComposeSpecLabel, PathMappingsLabel
7-
from pydantic import PositiveInt
6+
from models_library.service_settings_labels import ComposeSpecLabel
87

98
from ...core.settings import DynamicSidecarSettings
109
from .docker_service_specs import MATCH_SERVICE_VERSION, MATCH_SIMCORE_REGISTRY
@@ -16,14 +15,15 @@
1615
}
1716

1817

19-
def _inject_traefik_configuration(
18+
def _inject_proxy_network_configuration(
2019
service_spec: Dict[str, Any],
2120
target_container: str,
2221
dynamic_sidecar_network_name: str,
23-
simcore_traefik_zone: str,
24-
service_port: PositiveInt,
2522
) -> None:
26-
"""Injects configuration to allow the service to be accessible on the uuid.services.SERVICE_DNS"""
23+
"""
24+
Injects network configuration to allow the service
25+
to be accessible on `uuid.services.SERVICE_DNS`
26+
"""
2727

2828
# add external network to existing networks defined in the container
2929
service_spec["networks"] = {
@@ -41,21 +41,6 @@ def _inject_traefik_configuration(
4141
container_networks.append(dynamic_sidecar_network_name)
4242
target_container_spec["networks"] = container_networks
4343

44-
# expose spawned container to the internet
45-
labels = target_container_spec.get("labels", [])
46-
labels.extend(
47-
[
48-
f"io.simcore.zone={simcore_traefik_zone}",
49-
"traefik.enable=true",
50-
f"traefik.http.services.{target_container}.loadbalancer.server.port={service_port}",
51-
f"traefik.http.routers.{target_container}.entrypoints=http",
52-
f"traefik.http.routers.{target_container}.rule=PathPrefix(`/`)",
53-
]
54-
)
55-
56-
# put back updated labels
57-
target_container_spec["labels"] = labels
58-
5944

6045
def _assemble_from_service_key_and_tag(
6146
resolved_registry_url: str,
@@ -83,16 +68,12 @@ def _replace_env_vars_in_compose_spec(
8368

8469

8570
async def assemble_spec(
86-
# pylint: disable=too-many-arguments
8771
app: FastAPI,
8872
service_key: str,
8973
service_tag: str,
90-
paths_mapping: PathMappingsLabel, # pylint: disable=unused-argument
9174
compose_spec: ComposeSpecLabel,
9275
container_http_entry: Optional[str],
9376
dynamic_sidecar_network_name: str,
94-
simcore_traefik_zone: str,
95-
service_port: PositiveInt,
9677
) -> str:
9778
"""
9879
returns a docker-compose spec used by
@@ -113,20 +94,13 @@ async def assemble_spec(
11394
service_tag=service_tag,
11495
)
11596
container_name = CONTAINER_NAME
116-
else:
117-
# TODO: need to be sorted out:
118-
# - inject paths mapping
119-
# - remove above # pylint: disable=unused-argument
120-
pass
12197

12298
assert container_name is not None # nosec
12399

124-
_inject_traefik_configuration(
100+
_inject_proxy_network_configuration(
125101
service_spec,
126102
target_container=container_name,
127103
dynamic_sidecar_network_name=dynamic_sidecar_network_name,
128-
simcore_traefik_zone=simcore_traefik_zone,
129-
service_port=service_port,
130104
)
131105

132106
stringified_service_spec = yaml.safe_dump(service_spec)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
from .spec_dynamic_sidecar import (
2+
MATCH_SERVICE_VERSION,
3+
MATCH_SIMCORE_REGISTRY,
4+
extract_service_port_from_compose_start_spec,
5+
get_dynamic_sidecar_spec,
6+
merge_settings_before_use,
7+
)
8+
from .spec_proxy import get_dynamic_proxy_spec

0 commit comments

Comments
 (0)