From fb12c3311873d9171187fb6e0d7bb92a68295403 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Fri, 21 Feb 2020 09:58:49 +0100 Subject: [PATCH 01/68] setting 2 sidecars to reproduce issues that arise in // --- services/docker-compose.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/services/docker-compose.yml b/services/docker-compose.yml index a1d684eb239..e65d57fcc8d 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -90,6 +90,7 @@ services: mode: replicated replicas: 4 endpoint_mode: dnsrr + replicas: 2 resources: reservations: cpus: "0.1" From 6900437026fb156da9aeca582b17c3076f0c80f9 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Fri, 21 Feb 2020 16:39:11 +0100 Subject: [PATCH 02/68] add internal traefik --- services/docker-compose.local.yml | 7 ++++ services/docker-compose.yml | 60 ++++++++++++++++++++++++++++++- 2 files changed, 66 insertions(+), 1 deletion(-) diff --git a/services/docker-compose.local.yml b/services/docker-compose.local.yml index 5074e2242ab..56ac9ecb0bb 100644 --- a/services/docker-compose.local.yml +++ b/services/docker-compose.local.yml @@ -77,3 +77,10 @@ services: redis: ports: - "6379" + + traefik: + ports: + - target: 80 + published: 80 + - target: 8080 + published: 8080 diff --git a/services/docker-compose.yml b/services/docker-compose.yml index e65d57fcc8d..b95781b5f40 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -79,6 +79,14 @@ services: constraints: - node.platform.os == linux - node.role == manager + labels: + - io.simcore.zone=internal_simcore_stack + - traefik.enable=true + - traefik.docker.network=simcore_default + - traefik.http.services.webserver.loadbalancer.server.port=8080 + - traefik.http.routers.webserver.rule=hostregexp(`{host:.+}`) + - traefik.http.routers.webserver.entrypoints=http + - traefik.http.routers.webserver.middlewares=gzip@docker, sslheader@docker networks: - default - interactive_services_subnet @@ -90,7 +98,7 @@ services: mode: replicated replicas: 4 endpoint_mode: dnsrr - replicas: 2 + replicas: 10 resources: reservations: cpus: "0.1" @@ -209,6 +217,56 @@ services: networks: - default + traefik: + image: traefik:v2.1 + init: true + command: + - '--api=true' + - '--api.dashboard=true' + - '--log.level=INFO' + - '--accesslog=false' + - '--metrics.prometheus=true' + - '--metrics.prometheus.addEntryPointsLabels=true' + - '--metrics.prometheus.addServicesLabels=true' + - '--entryPoints.metrics.address=:8082' + - '--metrics.prometheus.entryPoint=metrics' + - '--entryPoints.http.address=:80' + - '--entryPoints.traefik_dashboard.address=:8080' + - '--providers.docker.endpoint=unix:///var/run/docker.sock' + - '--providers.docker.swarmMode=true' + - '--providers.docker.exposedByDefault=false' + - '--providers.docker.constraints=Label(`io.simcore.zone`, `internal_simcore_stack`)' + - '--tracing=true' + - '--tracing.jaeger=true' + - '--tracing.jaeger.samplingServerURL=http://jaeger:5778/sampling' + - '--tracing.jaeger.localAgentHostPort=jaeger:6831' + + volumes: + # So that Traefik can listen to the Docker events + - /var/run/docker.sock:/var/run/docker.sock + + deploy: + placement: + constraints: + - node.role == manager + labels: + - io.simcore.zone=internal_simcore_stack + - traefik.enable=true + - traefik.docker.network=simcore_default + # gzip compression + - traefik.http.middlewares.gzip.compress=true + # ssl header necessary so that socket.io upgrades correctly from polling to websocket mode. the middleware must be attached to the right connection. + - traefik.http.middlewares.sslheader.headers.customrequestheaders.X-Forwarded-Proto=http + # traefik UI + - traefik.http.routers.api.service=api@internal + - traefik.http.routers.api.rule=PathPrefix(`/dashboard`) || PathPrefix(`/api`) + - traefik.http.routers.api.entrypoints=traefik_dashboard + - traefik.http.routers.api.middlewares=gzip@docker + - traefik.http.services.api.loadbalancer.server.port=8080 + networks: + - default + - interactive_services_subnet + volumes: input: {} output: {} From 17b787614bffc44f64f7322d63907e925e1b31dd Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Fri, 21 Feb 2020 16:39:24 +0100 Subject: [PATCH 03/68] director sets the labels for traefik --- .../src/simcore_service_director/producer.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/services/director/src/simcore_service_director/producer.py b/services/director/src/simcore_service_director/producer.py index 5840ecf2e03..6f20d7ec31c 100644 --- a/services/director/src/simcore_service_director/producer.py +++ b/services/director/src/simcore_service_director/producer.py @@ -137,7 +137,14 @@ async def _create_docker_service_params(app: web.Application, "uuid": node_uuid, "study_id": project_id, "user_id": user_id, - "type": "main" if main_service else "dependency" + "type": "main" if main_service else "dependency", + "io.simcore.zone": "internal_simcore_stack", + "traefik.enable": "true", + "traefik.docker.network": "simcore_default", + f"traefik.http.services.{node_uuid}.loadbalancer.server.port": 8080, + f"traefik.http.routers.{node_uuid}.rule": f"PathPrefix(`/x/{node_uuid}`)", + f"traefik.http.routers.{node_uuid}.entrypoints": "http", + f"traefik.http.routers.{node_uuid}.middlewares": "gzip@docker, sslheader@docker", }, "networks": [internal_network_id] if internal_network_id else [] } @@ -165,7 +172,7 @@ async def _create_docker_service_params(app: web.Application, # publishing port on the ingress network. elif param["name"] == "ports" and param["type"] == "int": # backward comp - docker_params["labels"]["port"] = str(param["value"]) + docker_params["labels"]["port"] = docker_params["labels"][f"traefik.http.services.{node_uuid}.loadbalancer.server.port"] = str(param["value"]) if config.DEBUG_MODE: # special handling for we need to open a port with 0:XXX this tells the docker engine to allocate whatever free port docker_params["endpoint_spec"]["Ports"] = [ @@ -177,7 +184,7 @@ async def _create_docker_service_params(app: web.Application, elif config.DEBUG_MODE and param["type"] == "EndpointSpec": # REST-API compatible if "Ports" in param["value"]: if isinstance(param["value"]["Ports"], list) and "TargetPort" in param["value"]["Ports"][0]: - docker_params["labels"]["port"] = str(param["value"]["Ports"][0]["TargetPort"]) + docker_params["labels"]["port"] = docker_params["labels"][f"traefik.http.services.{node_uuid}.loadbalancer.server.port"] = str(param["value"]["Ports"][0]["TargetPort"]) if config.DEBUG_MODE: docker_params["endpoint_spec"] = param["value"] From 3490c3f6c3d4724d134aa559d81bcd28f5fd53db Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Fri, 21 Feb 2020 17:36:27 +0100 Subject: [PATCH 04/68] disable webserver reverse proxy --- .../simcore_service_webserver/config/server-docker-dev.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/services/web/server/src/simcore_service_webserver/config/server-docker-dev.yaml b/services/web/server/src/simcore_service_webserver/config/server-docker-dev.yaml index 31f7703fb82..ae6717508c9 100644 --- a/services/web/server/src/simcore_service_webserver/config/server-docker-dev.yaml +++ b/services/web/server/src/simcore_service_webserver/config/server-docker-dev.yaml @@ -21,6 +21,10 @@ catalog: host: catalog port: 8000 version: v0 +application_proxy: + enabled: False +reverse_proxy: + enabled: False db: init_tables: True postgres: From eca5212e70abefe7790ccb604e186eba81218233 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 25 Feb 2020 11:48:16 +0100 Subject: [PATCH 05/68] better naming for traefik routers --- .../src/simcore_service_director/producer.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/services/director/src/simcore_service_director/producer.py b/services/director/src/simcore_service_director/producer.py index 6f20d7ec31c..b0286cef33e 100644 --- a/services/director/src/simcore_service_director/producer.py +++ b/services/director/src/simcore_service_director/producer.py @@ -85,7 +85,7 @@ async def _create_docker_service_params(app: web.Application, internal_network_id: Optional[str]) -> Dict: service_parameters_labels = await _read_service_settings(app, service_key, service_tag) - + service_name = registry_proxy.get_service_last_names(service_key) + "_" + node_uuid log.debug("Converting labels to docker runtime parameters") container_spec = { "Image": "{}/{}:{}".format(config.REGISTRY_URL, service_key, service_tag), @@ -95,7 +95,7 @@ async def _create_docker_service_params(app: web.Application, "SIMCORE_NODE_UUID": node_uuid, "SIMCORE_PROJECT_ID": project_id, "SIMCORE_NODE_BASEPATH": node_base_path or "", - "SIMCORE_HOST_NAME": registry_proxy.get_service_last_names(service_key) + "_" + node_uuid + "SIMCORE_HOST_NAME": service_name }, "Hosts": get_system_extra_hosts_raw(config.EXTRA_HOSTS_SUFFIX), "Init": True, @@ -105,10 +105,11 @@ async def _create_docker_service_params(app: web.Application, "node_id": node_uuid } } + docker_params = { "auth": await _create_auth() if config.REGISTRY_AUTH else {}, "registry": config.REGISTRY_URL if config.REGISTRY_AUTH else "", - "name": registry_proxy.get_service_last_names(service_key) + "_" + node_uuid, + "name": service_name, "task_template": { "ContainerSpec": container_spec, "Placement": { @@ -141,10 +142,10 @@ async def _create_docker_service_params(app: web.Application, "io.simcore.zone": "internal_simcore_stack", "traefik.enable": "true", "traefik.docker.network": "simcore_default", - f"traefik.http.services.{node_uuid}.loadbalancer.server.port": 8080, - f"traefik.http.routers.{node_uuid}.rule": f"PathPrefix(`/x/{node_uuid}`)", - f"traefik.http.routers.{node_uuid}.entrypoints": "http", - f"traefik.http.routers.{node_uuid}.middlewares": "gzip@docker, sslheader@docker", + f"traefik.http.services.{service_name}.loadbalancer.server.port": 8080, + f"traefik.http.routers.{service_name}.rule": f"PathPrefix(`/x/{node_uuid}`)", + f"traefik.http.routers.{service_name}.entrypoints": "http", + f"traefik.http.routers.{service_name}.middlewares": f"gzip@docker, sslheader@docker", }, "networks": [internal_network_id] if internal_network_id else [] } @@ -172,7 +173,7 @@ async def _create_docker_service_params(app: web.Application, # publishing port on the ingress network. elif param["name"] == "ports" and param["type"] == "int": # backward comp - docker_params["labels"]["port"] = docker_params["labels"][f"traefik.http.services.{node_uuid}.loadbalancer.server.port"] = str(param["value"]) + docker_params["labels"]["port"] = docker_params["labels"][f"traefik.http.services.{service_name}.loadbalancer.server.port"] = str(param["value"]) if config.DEBUG_MODE: # special handling for we need to open a port with 0:XXX this tells the docker engine to allocate whatever free port docker_params["endpoint_spec"]["Ports"] = [ @@ -184,7 +185,7 @@ async def _create_docker_service_params(app: web.Application, elif config.DEBUG_MODE and param["type"] == "EndpointSpec": # REST-API compatible if "Ports" in param["value"]: if isinstance(param["value"]["Ports"], list) and "TargetPort" in param["value"]["Ports"][0]: - docker_params["labels"]["port"] = docker_params["labels"][f"traefik.http.services.{node_uuid}.loadbalancer.server.port"] = str(param["value"]["Ports"][0]["TargetPort"]) + docker_params["labels"]["port"] = docker_params["labels"][f"traefik.http.services.{service_name}.loadbalancer.server.port"] = str(param["value"]["Ports"][0]["TargetPort"]) if config.DEBUG_MODE: docker_params["endpoint_spec"] = param["value"] From f8eb431cc36fff2a1109961b897350153f1cb9d3 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 25 Feb 2020 11:48:45 +0100 Subject: [PATCH 06/68] 3d-viewer hack for now --- .../director/src/simcore_service_director/producer.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/services/director/src/simcore_service_director/producer.py b/services/director/src/simcore_service_director/producer.py index b0286cef33e..7cff0f8881c 100644 --- a/services/director/src/simcore_service_director/producer.py +++ b/services/director/src/simcore_service_director/producer.py @@ -149,6 +149,12 @@ async def _create_docker_service_params(app: web.Application, }, "networks": [internal_network_id] if internal_network_id else [] } + if "3d-viewer" in service_name: + # HACK: Paraview visualizer needs a strip prefix here, this should be removed once dy-sidecar is in or that + # all dynamic services are converted to using traefik as reverse proxy instead of webserver + docker_params["labels"][f"traefik.http.middlewares.{service_name}_stripprefixregex.stripprefixregex.regex"] = f"^/x/{node_uuid}" + docker_params["labels"][f"traefik.http.routers.{service_name}.middlewares"] += f", {service_name}_stripprefixregex" + for param in service_parameters_labels: _check_setting_correctness(param) # replace %service_uuid% by the given uuid @@ -670,7 +676,7 @@ async def stop_service(app: web.Application, node_uuid: str) -> None: service_details = await get_service_details(app, node_uuid) service_host_name = "{}:{}{}".format(service_details["service_host"], service_details["service_port"] if service_details["service_port"] else "80", - service_details["service_basepath"]) + service_details["service_basepath"] if not "3d-viewer" in service_details["service_host"] else "") log.debug("saving state of service %s...", service_host_name) try: session = app[APP_CLIENT_SESSION_KEY] From 8d0d14d75e575ee20eb4e7b6815afd7cd0e324cf Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 25 Feb 2020 15:18:52 +0100 Subject: [PATCH 07/68] internal traefik api name conflict --- services/docker-compose.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/services/docker-compose.yml b/services/docker-compose.yml index b95781b5f40..0fa2ea0cde6 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -258,11 +258,11 @@ services: # ssl header necessary so that socket.io upgrades correctly from polling to websocket mode. the middleware must be attached to the right connection. - traefik.http.middlewares.sslheader.headers.customrequestheaders.X-Forwarded-Proto=http # traefik UI - - traefik.http.routers.api.service=api@internal - - traefik.http.routers.api.rule=PathPrefix(`/dashboard`) || PathPrefix(`/api`) - - traefik.http.routers.api.entrypoints=traefik_dashboard - - traefik.http.routers.api.middlewares=gzip@docker - - traefik.http.services.api.loadbalancer.server.port=8080 + - traefik.http.routers.api_internal.service=api@internal + - traefik.http.routers.api_internal.rule=PathPrefix(`/dashboard`) || PathPrefix(`/api`) + - traefik.http.routers.api_internal.entrypoints=traefik_dashboard + - traefik.http.routers.api_internal.middlewares=gzip@docker + - traefik.http.services.api_internal.loadbalancer.server.port=8080 networks: - default - interactive_services_subnet From e6846dea90245d98d4a22cf57074fbcfca8ed7dc Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 25 Feb 2020 15:42:29 +0100 Subject: [PATCH 08/68] disabled internal api --- services/docker-compose.yml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/services/docker-compose.yml b/services/docker-compose.yml index 0fa2ea0cde6..19d801786be 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -250,19 +250,19 @@ services: constraints: - node.role == manager labels: - - io.simcore.zone=internal_simcore_stack - - traefik.enable=true - - traefik.docker.network=simcore_default + # - io.simcore.zone=internal_simcore_stack + # - traefik.enable=true + # - traefik.docker.network=simcore_default # gzip compression - - traefik.http.middlewares.gzip.compress=true + # - traefik.http.middlewares.gzip.compress=true # ssl header necessary so that socket.io upgrades correctly from polling to websocket mode. the middleware must be attached to the right connection. - - traefik.http.middlewares.sslheader.headers.customrequestheaders.X-Forwarded-Proto=http + # - traefik.http.middlewares.sslheader.headers.customrequestheaders.X-Forwarded-Proto=http # traefik UI - - traefik.http.routers.api_internal.service=api@internal - - traefik.http.routers.api_internal.rule=PathPrefix(`/dashboard`) || PathPrefix(`/api`) - - traefik.http.routers.api_internal.entrypoints=traefik_dashboard - - traefik.http.routers.api_internal.middlewares=gzip@docker - - traefik.http.services.api_internal.loadbalancer.server.port=8080 + # - traefik.http.routers.api_internal.service=api@internal + # - traefik.http.routers.api_internal.rule=PathPrefix(`/dashboard`) || PathPrefix(`/api`) + # - traefik.http.routers.api_internal.entrypoints=traefik_dashboard + # - traefik.http.routers.api_internal.middlewares=gzip@docker + # - traefik.http.services.api_internal.loadbalancer.server.port=8080 networks: - default - interactive_services_subnet From fd2d37fe3aef06c8bf54bdd3ee69b6626c4b61e0 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 25 Feb 2020 15:44:23 +0100 Subject: [PATCH 09/68] typo --- services/docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/docker-compose.yml b/services/docker-compose.yml index 19d801786be..aeebbc14f70 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -249,7 +249,7 @@ services: placement: constraints: - node.role == manager - labels: + # labels: # - io.simcore.zone=internal_simcore_stack # - traefik.enable=true # - traefik.docker.network=simcore_default From c1fa8f9892578a36ad03ff50426551e2a6ec0c47 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 25 Feb 2020 15:52:06 +0100 Subject: [PATCH 10/68] moved creation of middlewares in internal traefik --- services/docker-compose.yml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/services/docker-compose.yml b/services/docker-compose.yml index aeebbc14f70..0ab5ea08a3f 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -81,6 +81,10 @@ services: - node.role == manager labels: - io.simcore.zone=internal_simcore_stack + # gzip compression + - traefik.http.middlewares.gzip.compress=true + # ssl header necessary so that socket.io upgrades correctly from polling to websocket mode. the middleware must be attached to the right connection. + - traefik.http.middlewares.sslheader.headers.customrequestheaders.X-Forwarded-Proto=http - traefik.enable=true - traefik.docker.network=simcore_default - traefik.http.services.webserver.loadbalancer.server.port=8080 @@ -249,14 +253,11 @@ services: placement: constraints: - node.role == manager - # labels: + labels: # - io.simcore.zone=internal_simcore_stack # - traefik.enable=true # - traefik.docker.network=simcore_default - # gzip compression - # - traefik.http.middlewares.gzip.compress=true - # ssl header necessary so that socket.io upgrades correctly from polling to websocket mode. the middleware must be attached to the right connection. - # - traefik.http.middlewares.sslheader.headers.customrequestheaders.X-Forwarded-Proto=http + # traefik UI # - traefik.http.routers.api_internal.service=api@internal # - traefik.http.routers.api_internal.rule=PathPrefix(`/dashboard`) || PathPrefix(`/api`) From 69cc3734cc378f2e089e155a900f16108b7ecf72 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 25 Feb 2020 15:53:52 +0100 Subject: [PATCH 11/68] typo --- services/docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/docker-compose.yml b/services/docker-compose.yml index 0ab5ea08a3f..bcf66428970 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -253,7 +253,7 @@ services: placement: constraints: - node.role == manager - labels: + # labels: # - io.simcore.zone=internal_simcore_stack # - traefik.enable=true # - traefik.docker.network=simcore_default From 9af5acb0239198b72bbb69ef2e3858115e0f688f Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 25 Feb 2020 16:22:53 +0100 Subject: [PATCH 12/68] add internal ssl headers for socket.io --- services/docker-compose.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/docker-compose.yml b/services/docker-compose.yml index bcf66428970..efd02a9e55c 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -84,13 +84,13 @@ services: # gzip compression - traefik.http.middlewares.gzip.compress=true # ssl header necessary so that socket.io upgrades correctly from polling to websocket mode. the middleware must be attached to the right connection. - - traefik.http.middlewares.sslheader.headers.customrequestheaders.X-Forwarded-Proto=http + - traefik.http.middlewares.simcore_sslheader.headers.customrequestheaders.X-Forwarded-Proto=http - traefik.enable=true - traefik.docker.network=simcore_default - traefik.http.services.webserver.loadbalancer.server.port=8080 - traefik.http.routers.webserver.rule=hostregexp(`{host:.+}`) - traefik.http.routers.webserver.entrypoints=http - - traefik.http.routers.webserver.middlewares=gzip@docker, sslheader@docker + - traefik.http.routers.webserver.middlewares=gzip@docker, simcore_sslheader@docker networks: - default - interactive_services_subnet From fbe938c4456b5236ae393e4123488944126e217b Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 25 Feb 2020 16:28:16 +0100 Subject: [PATCH 13/68] testing --- services/docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/docker-compose.yml b/services/docker-compose.yml index efd02a9e55c..a9ce3aa6035 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -84,7 +84,7 @@ services: # gzip compression - traefik.http.middlewares.gzip.compress=true # ssl header necessary so that socket.io upgrades correctly from polling to websocket mode. the middleware must be attached to the right connection. - - traefik.http.middlewares.simcore_sslheader.headers.customrequestheaders.X-Forwarded-Proto=http + - traefik.http.middlewares.simcore_sslheader.headers.customrequestheaders.X-Forwarded-Proto=https - traefik.enable=true - traefik.docker.network=simcore_default - traefik.http.services.webserver.loadbalancer.server.port=8080 From 396cee70f2b44325dfd931881c127b0168878bff Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 25 Feb 2020 16:38:12 +0100 Subject: [PATCH 14/68] default to http for local deployment --- services/docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/docker-compose.yml b/services/docker-compose.yml index a9ce3aa6035..efd02a9e55c 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -84,7 +84,7 @@ services: # gzip compression - traefik.http.middlewares.gzip.compress=true # ssl header necessary so that socket.io upgrades correctly from polling to websocket mode. the middleware must be attached to the right connection. - - traefik.http.middlewares.simcore_sslheader.headers.customrequestheaders.X-Forwarded-Proto=https + - traefik.http.middlewares.simcore_sslheader.headers.customrequestheaders.X-Forwarded-Proto=http - traefik.enable=true - traefik.docker.network=simcore_default - traefik.http.services.webserver.loadbalancer.server.port=8080 From d15a22cd24d20d6d1743b8f0c942875d7c2185dd Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 25 Feb 2020 18:17:18 +0100 Subject: [PATCH 15/68] removed sslheader middleware --- services/director/src/simcore_service_director/producer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/director/src/simcore_service_director/producer.py b/services/director/src/simcore_service_director/producer.py index 7cff0f8881c..bc62e3446be 100644 --- a/services/director/src/simcore_service_director/producer.py +++ b/services/director/src/simcore_service_director/producer.py @@ -145,7 +145,7 @@ async def _create_docker_service_params(app: web.Application, f"traefik.http.services.{service_name}.loadbalancer.server.port": 8080, f"traefik.http.routers.{service_name}.rule": f"PathPrefix(`/x/{node_uuid}`)", f"traefik.http.routers.{service_name}.entrypoints": "http", - f"traefik.http.routers.{service_name}.middlewares": f"gzip@docker, sslheader@docker", + f"traefik.http.routers.{service_name}.middlewares": f"gzip@docker", }, "networks": [internal_network_id] if internal_network_id else [] } From 0a462537902cca12425b9b22a50a88687b0e4f4c Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 25 Feb 2020 18:37:00 +0100 Subject: [PATCH 16/68] setup default network for internal traefik --- services/director/src/simcore_service_director/producer.py | 1 - services/docker-compose.yml | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/services/director/src/simcore_service_director/producer.py b/services/director/src/simcore_service_director/producer.py index bc62e3446be..b18a9684c75 100644 --- a/services/director/src/simcore_service_director/producer.py +++ b/services/director/src/simcore_service_director/producer.py @@ -141,7 +141,6 @@ async def _create_docker_service_params(app: web.Application, "type": "main" if main_service else "dependency", "io.simcore.zone": "internal_simcore_stack", "traefik.enable": "true", - "traefik.docker.network": "simcore_default", f"traefik.http.services.{service_name}.loadbalancer.server.port": 8080, f"traefik.http.routers.{service_name}.rule": f"PathPrefix(`/x/{node_uuid}`)", f"traefik.http.routers.{service_name}.entrypoints": "http", diff --git a/services/docker-compose.yml b/services/docker-compose.yml index efd02a9e55c..c8bdb3b168e 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -86,7 +86,6 @@ services: # ssl header necessary so that socket.io upgrades correctly from polling to websocket mode. the middleware must be attached to the right connection. - traefik.http.middlewares.simcore_sslheader.headers.customrequestheaders.X-Forwarded-Proto=http - traefik.enable=true - - traefik.docker.network=simcore_default - traefik.http.services.webserver.loadbalancer.server.port=8080 - traefik.http.routers.webserver.rule=hostregexp(`{host:.+}`) - traefik.http.routers.webserver.entrypoints=http @@ -238,6 +237,7 @@ services: - '--entryPoints.traefik_dashboard.address=:8080' - '--providers.docker.endpoint=unix:///var/run/docker.sock' - '--providers.docker.swarmMode=true' + - '--providers.docker.network=simcore_default' - '--providers.docker.exposedByDefault=false' - '--providers.docker.constraints=Label(`io.simcore.zone`, `internal_simcore_stack`)' - '--tracing=true' @@ -256,7 +256,6 @@ services: # labels: # - io.simcore.zone=internal_simcore_stack # - traefik.enable=true - # - traefik.docker.network=simcore_default # traefik UI # - traefik.http.routers.api_internal.service=api@internal From f488778ae6c556d6900ba64fae666c0d7d34e1cb Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Wed, 26 Feb 2020 00:11:02 +0100 Subject: [PATCH 17/68] frontend to wait till traefik completed redirection --- .../source/class/osparc/data/model/Node.js | 22 ++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/services/web/client/source/class/osparc/data/model/Node.js b/services/web/client/source/class/osparc/data/model/Node.js index 01495d49a2d..19b4a102e20 100644 --- a/services/web/client/source/class/osparc/data/model/Node.js +++ b/services/web/client/source/class/osparc/data/model/Node.js @@ -954,7 +954,7 @@ qx.Class.define("osparc.data.model.Node", { if (servicePath) { const entryPoint = entryPointD ? ("/" + entryPointD) : "/"; const srvUrl = servicePath + entryPoint; - this.__serviceReadyIn(srvUrl); + this.__wait_for_service_ready(srvUrl); } break; } @@ -1019,7 +1019,21 @@ qx.Class.define("osparc.data.model.Node", { this.__nodeState(); }, - + __wait_for_service_ready: function(srvUrl) { + // ping for some time until it is really ready + const pingRequest = new qx.io.request.Xhr(srvUrl); + pingRequest.addListenerOnce("success", function(e) { + console.log("service ready now"); + this.__serviceReadyIn(srvUrl); + }, this); + pingRequest.addListenerOnce("fail", e => { + const error = e.getTarget().getResponse(); + console.log("service not ready yet, waiting... " + error); + const interval = 1000; + qx.event.Timer.once(() => this.__wait_for_service_ready(srvUrl), this, interval); + }); + pingRequest.send(); + }, __serviceReadyIn: function(srvUrl) { this.setServiceUrl(srvUrl); this.setInteractiveStatus("ready"); @@ -1034,7 +1048,9 @@ qx.Class.define("osparc.data.model.Node", { this.setProgress(100); // FIXME: Apparently no all services are inmediately ready when they publish the port - const waitFor = 4000; + // ping the service until it is accessible through the platform + + const waitFor = 500; qx.event.Timer.once(ev => { this.__restartIFrame(); }, this, waitFor); From ce90cef307959c11a4799c58e9ea5a2a3e321e35 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Wed, 26 Feb 2020 00:14:57 +0100 Subject: [PATCH 18/68] doc --- services/director/src/simcore_service_director/producer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/services/director/src/simcore_service_director/producer.py b/services/director/src/simcore_service_director/producer.py index b18a9684c75..ea58416ef6a 100644 --- a/services/director/src/simcore_service_director/producer.py +++ b/services/director/src/simcore_service_director/producer.py @@ -149,7 +149,8 @@ async def _create_docker_service_params(app: web.Application, "networks": [internal_network_id] if internal_network_id else [] } if "3d-viewer" in service_name: - # HACK: Paraview visualizer needs a strip prefix here, this should be removed once dy-sidecar is in or that + # FIXME: the exception for the 3d-viewer shall be removed once the dy-sidecar comes in + # Paraview visualizer needs a strip prefix here, this should be removed once dy-sidecar is in or that # all dynamic services are converted to using traefik as reverse proxy instead of webserver docker_params["labels"][f"traefik.http.middlewares.{service_name}_stripprefixregex.stripprefixregex.regex"] = f"^/x/{node_uuid}" docker_params["labels"][f"traefik.http.routers.{service_name}.middlewares"] += f", {service_name}_stripprefixregex" @@ -673,6 +674,7 @@ async def stop_service(app: web.Application, node_uuid: str) -> None: log.debug("found service(s) with uuid %s", list_running_services_with_uuid) # save the state of the main service if it can service_details = await get_service_details(app, node_uuid) + # FIXME: the exception for the 3d-viewer shall be removed once the dy-sidecar comes in service_host_name = "{}:{}{}".format(service_details["service_host"], service_details["service_port"] if service_details["service_port"] else "80", service_details["service_basepath"] if not "3d-viewer" in service_details["service_host"] else "") From 86be3de24c91d8d309b5ee2a0e359c087bbef23d Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Wed, 26 Feb 2020 00:15:09 +0100 Subject: [PATCH 19/68] number of sidecars locally to 8 --- services/docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/docker-compose.yml b/services/docker-compose.yml index c8bdb3b168e..9a4bf69d101 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -101,7 +101,7 @@ services: mode: replicated replicas: 4 endpoint_mode: dnsrr - replicas: 10 + replicas: 8 resources: reservations: cpus: "0.1" From c87879593040c26433128220ef6edf691835f5c7 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Wed, 26 Feb 2020 00:28:35 +0100 Subject: [PATCH 20/68] camelcase --- services/web/client/source/class/osparc/data/model/Node.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/services/web/client/source/class/osparc/data/model/Node.js b/services/web/client/source/class/osparc/data/model/Node.js index 19b4a102e20..ea8ca97cfc4 100644 --- a/services/web/client/source/class/osparc/data/model/Node.js +++ b/services/web/client/source/class/osparc/data/model/Node.js @@ -954,7 +954,7 @@ qx.Class.define("osparc.data.model.Node", { if (servicePath) { const entryPoint = entryPointD ? ("/" + entryPointD) : "/"; const srvUrl = servicePath + entryPoint; - this.__wait_for_service_ready(srvUrl); + this.__waitForServiceReady(srvUrl); } break; } @@ -1019,7 +1019,7 @@ qx.Class.define("osparc.data.model.Node", { this.__nodeState(); }, - __wait_for_service_ready: function(srvUrl) { + __waitForServiceReady: function(srvUrl) { // ping for some time until it is really ready const pingRequest = new qx.io.request.Xhr(srvUrl); pingRequest.addListenerOnce("success", function(e) { @@ -1030,7 +1030,7 @@ qx.Class.define("osparc.data.model.Node", { const error = e.getTarget().getResponse(); console.log("service not ready yet, waiting... " + error); const interval = 1000; - qx.event.Timer.once(() => this.__wait_for_service_ready(srvUrl), this, interval); + qx.event.Timer.once(() => this.__waitForServiceReady(srvUrl), this, interval); }); pingRequest.send(); }, From 1306cb31c40633d45251a7cba8b788b9ff0c6a26 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Wed, 26 Feb 2020 08:10:08 +0100 Subject: [PATCH 21/68] added fixmes to be fixed --- services/director/src/simcore_service_director/producer.py | 1 + services/docker-compose.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/services/director/src/simcore_service_director/producer.py b/services/director/src/simcore_service_director/producer.py index ea58416ef6a..013b2f2e5a7 100644 --- a/services/director/src/simcore_service_director/producer.py +++ b/services/director/src/simcore_service_director/producer.py @@ -139,6 +139,7 @@ async def _create_docker_service_params(app: web.Application, "study_id": project_id, "user_id": user_id, "type": "main" if main_service else "dependency", + #FIXME: the zone must be prefixed with the stack name!!! "io.simcore.zone": "internal_simcore_stack", "traefik.enable": "true", f"traefik.http.services.{service_name}.loadbalancer.server.port": 8080, diff --git a/services/docker-compose.yml b/services/docker-compose.yml index 9a4bf69d101..3aaae131bc6 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -239,6 +239,7 @@ services: - '--providers.docker.swarmMode=true' - '--providers.docker.network=simcore_default' - '--providers.docker.exposedByDefault=false' + #FIXME: the zone must be prefixed with the stack name!!! - '--providers.docker.constraints=Label(`io.simcore.zone`, `internal_simcore_stack`)' - '--tracing=true' - '--tracing.jaeger=true' From 8bfb5e37ab399f19cb2dc69efb0e6870a442d905 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Wed, 18 Mar 2020 18:01:47 +0100 Subject: [PATCH 22/68] fix bad merge --- services/docker-compose.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/services/docker-compose.yml b/services/docker-compose.yml index 3aaae131bc6..597d70f1081 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -99,9 +99,8 @@ services: init: true deploy: mode: replicated - replicas: 4 - endpoint_mode: dnsrr replicas: 8 + endpoint_mode: dnsrr resources: reservations: cpus: "0.1" From 3f4b8e956ec1228117186d1cb522d7e6b9512abe Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 7 Apr 2020 11:04:46 +0200 Subject: [PATCH 23/68] update internal traefik to 2.2 --- services/docker-compose.yml | 63 ++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 32 deletions(-) diff --git a/services/docker-compose.yml b/services/docker-compose.yml index 597d70f1081..454d88599b9 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -100,7 +100,7 @@ services: deploy: mode: replicated replicas: 8 - endpoint_mode: dnsrr + endpoint_mode: dnsrr resources: reservations: cpus: "0.1" @@ -220,30 +220,30 @@ services: - default traefik: - image: traefik:v2.1 + image: traefik:v2.2 init: true command: - - '--api=true' - - '--api.dashboard=true' - - '--log.level=INFO' - - '--accesslog=false' - - '--metrics.prometheus=true' - - '--metrics.prometheus.addEntryPointsLabels=true' - - '--metrics.prometheus.addServicesLabels=true' - - '--entryPoints.metrics.address=:8082' - - '--metrics.prometheus.entryPoint=metrics' - - '--entryPoints.http.address=:80' - - '--entryPoints.traefik_dashboard.address=:8080' - - '--providers.docker.endpoint=unix:///var/run/docker.sock' - - '--providers.docker.swarmMode=true' - - '--providers.docker.network=simcore_default' - - '--providers.docker.exposedByDefault=false' - #FIXME: the zone must be prefixed with the stack name!!! - - '--providers.docker.constraints=Label(`io.simcore.zone`, `internal_simcore_stack`)' - - '--tracing=true' - - '--tracing.jaeger=true' - - '--tracing.jaeger.samplingServerURL=http://jaeger:5778/sampling' - - '--tracing.jaeger.localAgentHostPort=jaeger:6831' + - "--api=true" + - "--api.dashboard=true" + - "--log.level=INFO" + - "--accesslog=false" + - "--metrics.prometheus=true" + - "--metrics.prometheus.addEntryPointsLabels=true" + - "--metrics.prometheus.addServicesLabels=true" + - "--entryPoints.metrics.address=:8082" + - "--metrics.prometheus.entryPoint=metrics" + - "--entryPoints.http.address=:80" + - "--entryPoints.traefik_dashboard.address=:8080" + - "--providers.docker.endpoint=unix:///var/run/docker.sock" + - "--providers.docker.swarmMode=true" + - "--providers.docker.network=simcore_default" + - "--providers.docker.exposedByDefault=false" + #FIXME: the zone must be prefixed with the stack name!!! + - "--providers.docker.constraints=Label(`io.simcore.zone`, `internal_simcore_stack`)" + - "--tracing=true" + - "--tracing.jaeger=true" + - "--tracing.jaeger.samplingServerURL=http://jaeger:5778/sampling" + - "--tracing.jaeger.localAgentHostPort=jaeger:6831" volumes: # So that Traefik can listen to the Docker events @@ -254,15 +254,14 @@ services: constraints: - node.role == manager # labels: - # - io.simcore.zone=internal_simcore_stack - # - traefik.enable=true - - # traefik UI - # - traefik.http.routers.api_internal.service=api@internal - # - traefik.http.routers.api_internal.rule=PathPrefix(`/dashboard`) || PathPrefix(`/api`) - # - traefik.http.routers.api_internal.entrypoints=traefik_dashboard - # - traefik.http.routers.api_internal.middlewares=gzip@docker - # - traefik.http.services.api_internal.loadbalancer.server.port=8080 + # - io.simcore.zone=internal_simcore_stack + # - traefik.enable=true + # traefik UI + # - traefik.http.routers.api_internal.service=api@internal + # - traefik.http.routers.api_internal.rule=PathPrefix(`/dashboard`) || PathPrefix(`/api`) + # - traefik.http.routers.api_internal.entrypoints=traefik_dashboard + # - traefik.http.routers.api_internal.middlewares=gzip@docker + # - traefik.http.services.api_internal.loadbalancer.server.port=8080 networks: - default - interactive_services_subnet From 426947ebe091ca03e2fec74e843f63d29b6703d1 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 7 Apr 2020 17:50:04 +0200 Subject: [PATCH 24/68] add traefik label for network black --- .../src/simcore_service_director/producer.py | 606 ++++++++++++------ 1 file changed, 395 insertions(+), 211 deletions(-) diff --git a/services/director/src/simcore_service_director/producer.py b/services/director/src/simcore_service_director/producer.py index 013b2f2e5a7..8597c919ac2 100644 --- a/services/director/src/simcore_service_director/producer.py +++ b/services/director/src/simcore_service_director/producer.py @@ -16,11 +16,12 @@ from .config import APP_CLIENT_SESSION_KEY from .system_utils import get_system_extra_hosts_raw -SERVICE_RUNTIME_SETTINGS: str = 'simcore.service.settings' -SERVICE_RUNTIME_BOOTSETTINGS: str = 'simcore.service.bootsettings' +SERVICE_RUNTIME_SETTINGS: str = "simcore.service.settings" +SERVICE_RUNTIME_BOOTSETTINGS: str = "simcore.service.bootsettings" log = logging.getLogger(__name__) + class ServiceState(Enum): PENDING = "pending" PULLING = "pulling" @@ -31,60 +32,76 @@ class ServiceState(Enum): async def _create_auth() -> Dict[str, str]: - return { - "username": config.REGISTRY_USER, - "password": config.REGISTRY_PW - } + return {"username": config.REGISTRY_USER, "password": config.REGISTRY_PW} -async def _check_node_uuid_available(client: aiodocker.docker.Docker, node_uuid: str) -> None: + +async def _check_node_uuid_available( + client: aiodocker.docker.Docker, node_uuid: str +) -> None: log.debug("Checked if UUID %s is already in use", node_uuid) # check if service with same uuid already exists try: list_of_running_services_w_uuid = await client.services.list( - filters={'label': 'uuid=' + node_uuid}) + filters={"label": "uuid=" + node_uuid} + ) except aiodocker.exceptions.DockerError as err: log.exception("Error while retrieving services list") raise exceptions.GenericDockerError( - "Error while retrieving services", err) from err + "Error while retrieving services", err + ) from err if list_of_running_services_w_uuid: raise exceptions.ServiceUUIDInUseError(node_uuid) log.debug("UUID %s is free", node_uuid) def _check_setting_correctness(setting: Dict) -> None: - if 'name' not in setting or 'type' not in setting or 'value' not in setting: + if "name" not in setting or "type" not in setting or "value" not in setting: raise exceptions.DirectorException("Invalid setting in %s" % setting) async def _read_service_settings(app: web.Application, key: str, tag: str) -> Dict: # pylint: disable=C0103 image_labels = await registry_proxy.get_image_labels(app, key, tag) - runtime_parameters = json.loads(image_labels[SERVICE_RUNTIME_SETTINGS]) if SERVICE_RUNTIME_SETTINGS in image_labels else {} + runtime_parameters = ( + json.loads(image_labels[SERVICE_RUNTIME_SETTINGS]) + if SERVICE_RUNTIME_SETTINGS in image_labels + else {} + ) log.debug("Retrieved service runtime settings: %s", runtime_parameters) return runtime_parameters -async def _get_service_boot_parameters_labels(app: web.Application, key: str, tag: str) -> Dict: +async def _get_service_boot_parameters_labels( + app: web.Application, key: str, tag: str +) -> Dict: # pylint: disable=C0103 image_labels = await registry_proxy.get_image_labels(app, key, tag) - boot_params = json.loads(image_labels[SERVICE_RUNTIME_BOOTSETTINGS]) if SERVICE_RUNTIME_BOOTSETTINGS in image_labels else {} + boot_params = ( + json.loads(image_labels[SERVICE_RUNTIME_BOOTSETTINGS]) + if SERVICE_RUNTIME_BOOTSETTINGS in image_labels + else {} + ) log.debug("Retrieved service boot settings: %s", boot_params) return boot_params # pylint: disable=too-many-branches -async def _create_docker_service_params(app: web.Application, - client: aiodocker.docker.Docker, - service_key: str, - service_tag: str, - main_service: bool, - user_id: str, - node_uuid: str, - project_id: str, - node_base_path: str, - internal_network_id: Optional[str]) -> Dict: - - service_parameters_labels = await _read_service_settings(app, service_key, service_tag) +async def _create_docker_service_params( + app: web.Application, + client: aiodocker.docker.Docker, + service_key: str, + service_tag: str, + main_service: bool, + user_id: str, + node_uuid: str, + project_id: str, + node_base_path: str, + internal_network_id: Optional[str], +) -> Dict: + + service_parameters_labels = await _read_service_settings( + app, service_key, service_tag + ) service_name = registry_proxy.get_service_last_names(service_key) + "_" + node_uuid log.debug("Converting labels to docker runtime parameters") container_spec = { @@ -95,15 +112,11 @@ async def _create_docker_service_params(app: web.Application, "SIMCORE_NODE_UUID": node_uuid, "SIMCORE_PROJECT_ID": project_id, "SIMCORE_NODE_BASEPATH": node_base_path or "", - "SIMCORE_HOST_NAME": service_name + "SIMCORE_HOST_NAME": service_name, }, "Hosts": get_system_extra_hosts_raw(config.EXTRA_HOSTS_SUFFIX), "Init": True, - "Labels": { - "user_id": user_id, - "study_id": project_id, - "node_id": node_uuid - } + "Labels": {"user_id": user_id, "study_id": project_id, "node_id": node_uuid}, } docker_params = { @@ -113,33 +126,30 @@ async def _create_docker_service_params(app: web.Application, "task_template": { "ContainerSpec": container_spec, "Placement": { - "Constraints": ["node.role==worker"] if await docker_utils.swarm_has_worker_nodes() else [] + "Constraints": ["node.role==worker"] + if await docker_utils.swarm_has_worker_nodes() + else [] }, "RestartPolicy": { "Condition": "on-failure", "Delay": 5000000, - "MaxAttempts": 2 + "MaxAttempts": 2, }, "Resources": { - "Limits": { - "NanoCPUs": 2 * pow(10, 9), - "MemoryBytes": 1 * pow(1024, 3) - }, + "Limits": {"NanoCPUs": 2 * pow(10, 9), "MemoryBytes": 1 * pow(1024, 3)}, "Reservations": { "NanoCPUs": 1 * pow(10, 8), - "MemoryBytes": 500 * pow(1024, 2) - } - } - }, - "endpoint_spec": { - "Mode": "dnsrr" + "MemoryBytes": 500 * pow(1024, 2), + }, + }, }, + "endpoint_spec": {"Mode": "dnsrr"}, "labels": { "uuid": node_uuid, "study_id": project_id, "user_id": user_id, "type": "main" if main_service else "dependency", - #FIXME: the zone must be prefixed with the stack name!!! + # FIXME: the zone must be prefixed with the stack name!!! "io.simcore.zone": "internal_simcore_stack", "traefik.enable": "true", f"traefik.http.services.{service_name}.loadbalancer.server.port": 8080, @@ -147,73 +157,99 @@ async def _create_docker_service_params(app: web.Application, f"traefik.http.routers.{service_name}.entrypoints": "http", f"traefik.http.routers.{service_name}.middlewares": f"gzip@docker", }, - "networks": [internal_network_id] if internal_network_id else [] + "networks": [internal_network_id] if internal_network_id else [], } if "3d-viewer" in service_name: # FIXME: the exception for the 3d-viewer shall be removed once the dy-sidecar comes in # Paraview visualizer needs a strip prefix here, this should be removed once dy-sidecar is in or that # all dynamic services are converted to using traefik as reverse proxy instead of webserver - docker_params["labels"][f"traefik.http.middlewares.{service_name}_stripprefixregex.stripprefixregex.regex"] = f"^/x/{node_uuid}" - docker_params["labels"][f"traefik.http.routers.{service_name}.middlewares"] += f", {service_name}_stripprefixregex" + docker_params["labels"][ + f"traefik.http.middlewares.{service_name}_stripprefixregex.stripprefixregex.regex" + ] = f"^/x/{node_uuid}" + docker_params["labels"][ + f"traefik.http.routers.{service_name}.middlewares" + ] += f", {service_name}_stripprefixregex" for param in service_parameters_labels: _check_setting_correctness(param) # replace %service_uuid% by the given uuid - if str(param['value']).find("%service_uuid%") != -1: - dummy_string = json.dumps(param['value']) + if str(param["value"]).find("%service_uuid%") != -1: + dummy_string = json.dumps(param["value"]) dummy_string = dummy_string.replace("%service_uuid%", node_uuid) - param['value'] = json.loads(dummy_string) + param["value"] = json.loads(dummy_string) if param["type"] == "Resources": # python-API compatible for backward compatibility if "mem_limit" in param["value"]: - docker_params["task_template"]["Resources"]["Limits"]["MemoryBytes"] = param["value"]["mem_limit"] + docker_params["task_template"]["Resources"]["Limits"][ + "MemoryBytes" + ] = param["value"]["mem_limit"] if "cpu_limit" in param["value"]: - docker_params["task_template"]["Resources"]["Limits"]["NanoCPUs"] = param["value"]["cpu_limit"] + docker_params["task_template"]["Resources"]["Limits"][ + "NanoCPUs" + ] = param["value"]["cpu_limit"] if "mem_reservation" in param["value"]: - docker_params["task_template"]["Resources"]["Reservations"]["MemoryBytes"] = param["value"]["mem_reservation"] + docker_params["task_template"]["Resources"]["Reservations"][ + "MemoryBytes" + ] = param["value"]["mem_reservation"] if "cpu_reservation" in param["value"]: - docker_params["task_template"]["Resources"]["Reservations"]["NanoCPUs"] = param["value"]["cpu_reservation"] + docker_params["task_template"]["Resources"]["Reservations"][ + "NanoCPUs" + ] = param["value"]["cpu_reservation"] # REST-API compatible if "Limits" in param["value"] or "Reservations" in param["value"]: docker_params["task_template"]["Resources"].update(param["value"]) # publishing port on the ingress network. - elif param["name"] == "ports" and param["type"] == "int": # backward comp - docker_params["labels"]["port"] = docker_params["labels"][f"traefik.http.services.{service_name}.loadbalancer.server.port"] = str(param["value"]) + elif param["name"] == "ports" and param["type"] == "int": # backward comp + docker_params["labels"]["port"] = docker_params["labels"][ + f"traefik.http.services.{service_name}.loadbalancer.server.port" + ] = str(param["value"]) if config.DEBUG_MODE: # special handling for we need to open a port with 0:XXX this tells the docker engine to allocate whatever free port docker_params["endpoint_spec"]["Ports"] = [ - { - "TargetPort": int(param["value"]), - "PublishedPort": 0 - } - ] - elif config.DEBUG_MODE and param["type"] == "EndpointSpec": # REST-API compatible + {"TargetPort": int(param["value"]), "PublishedPort": 0} + ] + elif ( + config.DEBUG_MODE and param["type"] == "EndpointSpec" + ): # REST-API compatible if "Ports" in param["value"]: - if isinstance(param["value"]["Ports"], list) and "TargetPort" in param["value"]["Ports"][0]: - docker_params["labels"]["port"] = docker_params["labels"][f"traefik.http.services.{service_name}.loadbalancer.server.port"] = str(param["value"]["Ports"][0]["TargetPort"]) + if ( + isinstance(param["value"]["Ports"], list) + and "TargetPort" in param["value"]["Ports"][0] + ): + docker_params["labels"]["port"] = docker_params["labels"][ + f"traefik.http.services.{service_name}.loadbalancer.server.port" + ] = str(param["value"]["Ports"][0]["TargetPort"]) if config.DEBUG_MODE: docker_params["endpoint_spec"] = param["value"] # placement constraints - elif param["name"] == "constraints": # python-API compatible + elif param["name"] == "constraints": # python-API compatible docker_params["task_template"]["Placement"]["Constraints"] += param["value"] - elif param["type"] == "Constraints": # REST-API compatible + elif param["type"] == "Constraints": # REST-API compatible docker_params["task_template"]["Placement"]["Constraints"] += param["value"] # attach the service to the swarm network dedicated to services try: - swarm_network_id = (await _get_swarm_network(client))["Id"] + swarm_network = await _get_swarm_network(client) + swarm_network_id = swarm_network["Id"] + swarm_network_name = swarm_network["Name"] docker_params["networks"].append(swarm_network_id) + docker_params["labels"][f"traefik.docker.network"] = swarm_network_name + except exceptions.DirectorException: log.exception("Could not find swarm network") log.debug("Converted labels to docker runtime parameters: %s", docker_params) # set labels for CPU and Memory limits - container_spec["Labels"]["nano_cpus_limit"] = str(docker_params["task_template"]["Resources"]["Limits"]["NanoCPUs"]) - container_spec["Labels"]["mem_limit"] = str(docker_params["task_template"]["Resources"]["Limits"]["MemoryBytes"]) + container_spec["Labels"]["nano_cpus_limit"] = str( + docker_params["task_template"]["Resources"]["Limits"]["NanoCPUs"] + ) + container_spec["Labels"]["mem_limit"] = str( + docker_params["task_template"]["Resources"]["Limits"]["MemoryBytes"] + ) return docker_params @@ -222,33 +258,44 @@ def _get_service_entrypoint(service_boot_parameters_labels: Dict) -> str: log.debug("Getting service entrypoint") for param in service_boot_parameters_labels: _check_setting_correctness(param) - if param['name'] == 'entry_point': - log.debug("Service entrypoint is %s", param['value']) - return param['value'] - return '' + if param["name"] == "entry_point": + log.debug("Service entrypoint is %s", param["value"]) + return param["value"] + return "" + async def _get_swarm_network(client: aiodocker.docker.Docker) -> Dict: network_name = "_default" if config.SIMCORE_SERVICES_NETWORK_NAME: network_name = "{}".format(config.SIMCORE_SERVICES_NETWORK_NAME) # try to find the network name (usually named STACKNAME_default) - networks = [x for x in (await client.networks.list()) if "swarm" in x["Scope"] and network_name in x["Name"]] + networks = [ + x + for x in (await client.networks.list()) + if "swarm" in x["Scope"] and network_name in x["Name"] + ] if not networks or len(networks) > 1: raise exceptions.DirectorException( - msg="Swarm network name is not configured, found following networks: {}".format(networks)) + msg="Swarm network name is not configured, found following networks: {}".format( + networks + ) + ) return networks[0] -async def _get_docker_image_port_mapping(service: Dict) -> Tuple[Optional[str], Optional[int]]: + +async def _get_docker_image_port_mapping( + service: Dict, +) -> Tuple[Optional[str], Optional[int]]: log.debug("getting port published by service: %s", service) published_ports = list() target_ports = list() - if 'Endpoint' in service: - service_endpoints = service['Endpoint'] - if 'Ports' in service_endpoints: - ports_info_json = service_endpoints['Ports'] + if "Endpoint" in service: + service_endpoints = service["Endpoint"] + if "Ports" in service_endpoints: + ports_info_json = service_endpoints["Ports"] for port in ports_info_json: - published_ports.append(port['PublishedPort']) + published_ports.append(port["PublishedPort"]) target_ports.append(port["TargetPort"]) log.debug("Service %s publishes: %s ports", service["ID"], published_ports) @@ -265,24 +312,33 @@ async def _get_docker_image_port_mapping(service: Dict) -> Tuple[Optional[str], return published_port, target_port -@tenacity.retry(wait=tenacity.wait_fixed(2), - stop=tenacity.stop_after_attempt(3) or tenacity.stop_after_delay(10)) -async def _pass_port_to_service(service_name: str, - port: str, - service_boot_parameters_labels: Dict, - session: ClientSession) -> None: +@tenacity.retry( + wait=tenacity.wait_fixed(2), + stop=tenacity.stop_after_attempt(3) or tenacity.stop_after_delay(10), +) +async def _pass_port_to_service( + service_name: str, + port: str, + service_boot_parameters_labels: Dict, + session: ClientSession, +) -> None: for param in service_boot_parameters_labels: _check_setting_correctness(param) - if param['name'] == 'published_host': + if param["name"] == "published_host": # time.sleep(5) - route = param['value'] - log.debug("Service needs to get published host %s:%s using route %s", - config.PUBLISHED_HOST_NAME, port, route) + route = param["value"] + log.debug( + "Service needs to get published host %s:%s using route %s", + config.PUBLISHED_HOST_NAME, + port, + route, + ) service_url = "http://" + service_name + "/" + route - query_string = {"hostname": str( - config.PUBLISHED_HOST_NAME), "port": str(port)} - log.debug("creating request %s and query %s", - service_url, query_string) + query_string = { + "hostname": str(config.PUBLISHED_HOST_NAME), + "port": str(port), + } + log.debug("creating request %s and query %s", service_url, query_string) async with session.post(service_url, data=query_string) as response: log.debug("query response: %s", await response.text()) return @@ -290,37 +346,50 @@ async def _pass_port_to_service(service_name: str, async def _create_network_name(service_name: str, node_uuid: str) -> str: - return service_name + '_' + node_uuid + return service_name + "_" + node_uuid -async def _create_overlay_network_in_swarm(client: aiodocker.docker.Docker, - service_name: str, - node_uuid: str) -> str: - log.debug("Creating overlay network for service %s with uuid %s", service_name, node_uuid) +async def _create_overlay_network_in_swarm( + client: aiodocker.docker.Docker, service_name: str, node_uuid: str +) -> str: + log.debug( + "Creating overlay network for service %s with uuid %s", service_name, node_uuid + ) network_name = await _create_network_name(service_name, node_uuid) try: network_config = { "Name": network_name, "Driver": "overlay", - "Labels": { - "uuid": node_uuid - } + "Labels": {"uuid": node_uuid}, } docker_network = await client.networks.create(network_config) - log.debug("Network %s created for service %s with uuid %s", network_name, service_name, node_uuid) + log.debug( + "Network %s created for service %s with uuid %s", + network_name, + service_name, + node_uuid, + ) return docker_network.id except aiodocker.exceptions.DockerError as err: - log.exception( - "Error while creating network for service %s", service_name) + log.exception("Error while creating network for service %s", service_name) raise exceptions.GenericDockerError( - "Error while creating network", err) from err + "Error while creating network", err + ) from err -async def _remove_overlay_network_of_swarm(client: aiodocker.docker.Docker, node_uuid: str) -> None: +async def _remove_overlay_network_of_swarm( + client: aiodocker.docker.Docker, node_uuid: str +) -> None: log.debug("Removing overlay network for service with uuid %s", node_uuid) try: networks = await client.networks.list() - networks = [x for x in (await client.networks.list()) if x["Labels"] and "uuid" in x["Labels"] and x["Labels"]["uuid"] == node_uuid] + networks = [ + x + for x in (await client.networks.list()) + if x["Labels"] + and "uuid" in x["Labels"] + and x["Labels"]["uuid"] == node_uuid + ] log.debug("Found %s networks with uuid %s", len(networks), node_uuid) # remove any network in the list (should be only one) for network in networks: @@ -329,13 +398,16 @@ async def _remove_overlay_network_of_swarm(client: aiodocker.docker.Docker, node log.debug("Removed %s networks with uuid %s", len(networks), node_uuid) except aiodocker.exceptions.DockerError as err: log.exception( - "Error while removing networks for service with uuid: %s", node_uuid) + "Error while removing networks for service with uuid: %s", node_uuid + ) raise exceptions.GenericDockerError( - "Error while removing networks", err) from err + "Error while removing networks", err + ) from err - -async def _get_service_state(client: aiodocker.docker.Docker, service: Dict) -> Tuple[ServiceState, str]: +async def _get_service_state( + client: aiodocker.docker.Docker, service: Dict +) -> Tuple[ServiceState, str]: # some times one has to wait until the task info is filled service_name = service["Spec"]["Name"] log.debug("Getting service %s state", service_name) @@ -349,9 +421,13 @@ async def _get_service_state(client: aiodocker.docker.Docker, service: Dict) -> task_state = last_task["Status"]["State"] log.debug("%s %s", service["ID"], task_state) simcore_state = ServiceState.STARTING - simcore_message = last_task["Status"]["Err"] if "Err" in last_task["Status"] else "" + simcore_message = ( + last_task["Status"]["Err"] if "Err" in last_task["Status"] else "" + ) if task_state in ("failed", "rejected"): - log.error("service %s failed with %s", service_name, last_task["Status"]) + log.error( + "service %s failed with %s", service_name, last_task["Status"] + ) simcore_state = ServiceState.FAILED elif task_state in ("pending"): simcore_state = ServiceState.PENDING @@ -368,7 +444,10 @@ async def _get_service_state(client: aiodocker.docker.Docker, service: Dict) -> await asyncio.sleep(1) # 1s log.debug("Waited for service %s to start", service_name) -async def _wait_until_service_running_or_failed(client: aiodocker.docker.Docker, service: Dict, node_uuid: str) -> None: + +async def _wait_until_service_running_or_failed( + client: aiodocker.docker.Docker, service: Dict, node_uuid: str +) -> None: # some times one has to wait until the task info is filled service_name = service["Spec"]["Name"] log.debug("Waiting for service %s to start", service_name) @@ -382,7 +461,9 @@ async def _wait_until_service_running_or_failed(client: aiodocker.docker.Docker, task_state = last_task["Status"]["State"] log.debug("%s %s", service["ID"], task_state) if task_state in ("failed", "rejected"): - log.error("Error while waiting for service with %s", last_task["Status"]) + log.error( + "Error while waiting for service with %s", last_task["Status"] + ) raise exceptions.ServiceStartTimeoutError(service_name, node_uuid) if task_state in ("running", "complete"): break @@ -391,7 +472,9 @@ async def _wait_until_service_running_or_failed(client: aiodocker.docker.Docker, log.debug("Waited for service %s to start", service_name) -async def _get_repos_from_key(app: web.Application, service_key: str) -> Dict[str, List[Dict]]: +async def _get_repos_from_key( + app: web.Application, service_key: str +) -> Dict[str, List[Dict]]: # get the available image for the main service (syntax is image:tag) list_of_images = { service_key: await registry_proxy.list_image_tags(app, service_key) @@ -400,26 +483,36 @@ async def _get_repos_from_key(app: web.Application, service_key: str) -> Dict[st if not list_of_images[service_key]: raise exceptions.ServiceNotAvailableError(service_key) - log.debug("Service %s has the following list of images available: %s", - service_key, list_of_images) + log.debug( + "Service %s has the following list of images available: %s", + service_key, + list_of_images, + ) return list_of_images -async def _get_dependant_repos(app: web.Application, service_key: str, service_tag: str) -> List[Dict]: +async def _get_dependant_repos( + app: web.Application, service_key: str, service_tag: str +) -> List[Dict]: list_of_images = await _get_repos_from_key(app, service_key) tag = await _find_service_tag(list_of_images, service_key, service_tag) # look for dependencies - dependent_repositories = await registry_proxy.list_interactive_service_dependencies(app, service_key, tag) + dependent_repositories = await registry_proxy.list_interactive_service_dependencies( + app, service_key, tag + ) return dependent_repositories -async def _find_service_tag(list_of_images: Dict, service_key: str, service_tag: str) -> str: +async def _find_service_tag( + list_of_images: Dict, service_key: str, service_tag: str +) -> str: if not service_key in list_of_images: raise exceptions.ServiceNotAvailableError( - service_name=service_key, service_tag=service_tag) + service_name=service_key, service_tag=service_tag + ) # filter incorrect chars - regex = re.compile(r'^\d+\.\d+\.\d+$') + regex = re.compile(r"^\d+\.\d+\.\d+$") filtered_tags_list = filter(regex.search, list_of_images[service_key]) # sort them now available_tags_list = sorted(filtered_tags_list, key=StrictVersion) @@ -427,36 +520,56 @@ async def _find_service_tag(list_of_images: Dict, service_key: str, service_tag: if not available_tags_list: raise exceptions.ServiceNotAvailableError(service_key, service_tag) tag = service_tag - if not service_tag or service_tag == 'latest': + if not service_tag or service_tag == "latest": # get latest tag - tag = available_tags_list[len(available_tags_list)-1] + tag = available_tags_list[len(available_tags_list) - 1] elif available_tags_list.count(service_tag) != 1: raise exceptions.ServiceNotAvailableError( - service_name=service_key, service_tag=service_tag) + service_name=service_key, service_tag=service_tag + ) log.debug("Service tag found is %s ", service_tag) return tag -async def _start_docker_service(app: web.Application, - client: aiodocker.docker.Docker, - user_id: str, - project_id: str, - service_key: str, - service_tag: str, - main_service: bool, - node_uuid: str, - node_base_path: str, - internal_network_id: Optional[str] - ) -> Dict: # pylint: disable=R0913 - service_parameters = await _create_docker_service_params(app, client, service_key, service_tag, main_service, - user_id, node_uuid, project_id, node_base_path, internal_network_id) - log.debug("Starting docker service %s:%s using parameters %s", service_key, service_tag, service_parameters) + +async def _start_docker_service( + app: web.Application, + client: aiodocker.docker.Docker, + user_id: str, + project_id: str, + service_key: str, + service_tag: str, + main_service: bool, + node_uuid: str, + node_base_path: str, + internal_network_id: Optional[str], +) -> Dict: # pylint: disable=R0913 + service_parameters = await _create_docker_service_params( + app, + client, + service_key, + service_tag, + main_service, + user_id, + node_uuid, + project_id, + node_base_path, + internal_network_id, + ) + log.debug( + "Starting docker service %s:%s using parameters %s", + service_key, + service_tag, + service_parameters, + ) # lets start the service try: service = await client.services.create(**service_parameters) if "ID" not in service: # error while starting service - raise exceptions.DirectorException("Error while starting service: {}".format(str(service))) + raise exceptions.DirectorException( + "Error while starting service: {}".format(str(service)) + ) log.debug("Service started now waiting for it to run") # get the full info from docker @@ -470,11 +583,15 @@ async def _start_docker_service(app: web.Application, service = await client.services.inspect(service["ID"]) published_port, target_port = await _get_docker_image_port_mapping(service) # now pass boot parameters - service_boot_parameters_labels = await _get_service_boot_parameters_labels(app, service_key, service_tag) + service_boot_parameters_labels = await _get_service_boot_parameters_labels( + app, service_key, service_tag + ) service_entrypoint = _get_service_entrypoint(service_boot_parameters_labels) if published_port: session = app[APP_CLIENT_SESSION_KEY] - await _pass_port_to_service(service_name, published_port, service_boot_parameters_labels, session) + await _pass_port_to_service( + service_name, published_port, service_boot_parameters_labels, session + ) container_meta_data = { "published_port": published_port, @@ -486,7 +603,7 @@ async def _start_docker_service(app: web.Application, "service_port": target_port, "service_basepath": node_base_path, "service_state": service_state.value, - "service_message": service_msg + "service_message": service_msg, } return container_meta_data @@ -497,8 +614,7 @@ async def _start_docker_service(app: web.Application, except aiodocker.exceptions.DockerError as err: log.exception("Unexpected error") await _silent_service_cleanup(app, node_uuid) - raise exceptions.ServiceNotAvailableError( - service_key, service_tag) from err + raise exceptions.ServiceNotAvailableError(service_key, service_tag) from err async def _silent_service_cleanup(app: web.Application, node_uuid: str) -> None: @@ -508,51 +624,63 @@ async def _silent_service_cleanup(app: web.Application, node_uuid: str) -> None: pass -async def _create_node(app: web.Application, - client: aiodocker.docker.Docker, - user_id: str, - project_id: str, - list_of_services: List[Dict], - node_uuid: str, - node_base_path: str - ) -> List[Dict]: # pylint: disable=R0913, R0915 - log.debug("Creating %s docker services for node %s and base path %s for user %s", - len(list_of_services), node_uuid, node_base_path, user_id) +async def _create_node( + app: web.Application, + client: aiodocker.docker.Docker, + user_id: str, + project_id: str, + list_of_services: List[Dict], + node_uuid: str, + node_base_path: str, +) -> List[Dict]: # pylint: disable=R0913, R0915 + log.debug( + "Creating %s docker services for node %s and base path %s for user %s", + len(list_of_services), + node_uuid, + node_base_path, + user_id, + ) log.debug("Services %s will be started", list_of_services) # if the service uses several docker images, a network needs to be setup to connect them together inter_docker_network_id = None if len(list_of_services) > 1: service_name = registry_proxy.get_service_first_name(list_of_services[0]["key"]) - inter_docker_network_id = await _create_overlay_network_in_swarm(client, service_name, node_uuid) + inter_docker_network_id = await _create_overlay_network_in_swarm( + client, service_name, node_uuid + ) log.debug("Created docker network in swarm for service %s", service_name) containers_meta_data = list() for service in list_of_services: - service_meta_data = await _start_docker_service(app, - client, user_id, - project_id, - service["key"], - service["tag"], - list_of_services.index( - service) == 0, - node_uuid, - node_base_path, - inter_docker_network_id) + service_meta_data = await _start_docker_service( + app, + client, + user_id, + project_id, + service["key"], + service["tag"], + list_of_services.index(service) == 0, + node_uuid, + node_base_path, + inter_docker_network_id, + ) containers_meta_data.append(service_meta_data) return containers_meta_data -async def _get_service_key_version_from_docker_service(service: Dict) -> Tuple[str, str]: +async def _get_service_key_version_from_docker_service( + service: Dict, +) -> Tuple[str, str]: # docker_image = config.REGISTRY_URL + '/' + service_key + ':' + service_tag - service_full_name = str( - service["Spec"]["TaskTemplate"]["ContainerSpec"]["Image"]) + service_full_name = str(service["Spec"]["TaskTemplate"]["ContainerSpec"]["Image"]) if not service_full_name.startswith(config.REGISTRY_URL): raise exceptions.DirectorException( - msg="Invalid service {}".format(service_full_name)) + msg="Invalid service {}".format(service_full_name) + ) - service_full_name = service_full_name[len(config.REGISTRY_URL):].strip("/") + service_full_name = service_full_name[len(config.REGISTRY_URL) :].strip("/") return service_full_name.split(":")[0], service_full_name.split(":")[1] @@ -561,12 +689,26 @@ async def _get_service_basepath_from_docker_service(service: Dict) -> str: envs_dict = dict(x.split("=") for x in envs_list) return envs_dict["SIMCORE_NODE_BASEPATH"] -async def start_service(app: web.Application, user_id: str, project_id: str, service_key: str, service_tag: str, node_uuid: str, node_base_path: str) -> Dict: + +async def start_service( + app: web.Application, + user_id: str, + project_id: str, + service_key: str, + service_tag: str, + node_uuid: str, + node_base_path: str, +) -> Dict: # pylint: disable=C0103 - log.debug("starting service %s:%s using uuid %s, basepath %s", - service_key, service_tag, node_uuid, node_base_path) + log.debug( + "starting service %s:%s using uuid %s, basepath %s", + service_key, + service_tag, + node_uuid, + node_base_path, + ) # first check the uuid is available - async with docker_utils.docker_client() as client: # pylint: disable=not-async-context-manager + async with docker_utils.docker_client() as client: # pylint: disable=not-async-context-manager await _check_node_uuid_available(client, node_uuid) list_of_images = await _get_repos_from_key(app, service_key) service_tag = await _find_service_tag(list_of_images, service_key, service_tag) @@ -578,30 +720,41 @@ async def start_service(app: web.Application, user_id: str, project_id: str, ser if list_of_dependencies: list_of_services_to_start.extend(list_of_dependencies) - containers_meta_data = await _create_node(app, client, user_id, project_id, - list_of_services_to_start, - node_uuid, node_base_path) + containers_meta_data = await _create_node( + app, + client, + user_id, + project_id, + list_of_services_to_start, + node_uuid, + node_base_path, + ) node_details = containers_meta_data[0] # we return only the info of the main service return node_details -async def _get_node_details(app: web.Application, client: aiodocker.docker.Docker, service: Dict) -> Dict: - service_key, service_tag = await _get_service_key_version_from_docker_service(service) +async def _get_node_details( + app: web.Application, client: aiodocker.docker.Docker, service: Dict +) -> Dict: + service_key, service_tag = await _get_service_key_version_from_docker_service( + service + ) # get boot parameters - results = await asyncio.gather(_get_service_boot_parameters_labels(app, service_key, service_tag), - _get_service_basepath_from_docker_service(service), - _get_service_state(client, service)) + results = await asyncio.gather( + _get_service_boot_parameters_labels(app, service_key, service_tag), + _get_service_basepath_from_docker_service(service), + _get_service_state(client, service), + ) service_boot_parameters_labels = results[0] service_entrypoint = _get_service_entrypoint(service_boot_parameters_labels) service_basepath = results[1] service_state, service_msg = results[2] - service_name = service["Spec"]["Name"] + service_name = service["Spec"]["Name"] service_uuid = service["Spec"]["Labels"]["uuid"] - # get the published port published_port, target_port = await _get_docker_image_port_mapping(service) node_details = { @@ -614,60 +767,80 @@ async def _get_node_details(app: web.Application, client: aiodocker.docker.Docke "service_port": target_port, "service_basepath": service_basepath, "service_state": service_state.value, - "service_message": service_msg + "service_message": service_msg, } return node_details -async def get_services_details(app: web.Application, user_id: Optional[str], study_id: Optional[str]) -> List[Dict]: + +async def get_services_details( + app: web.Application, user_id: Optional[str], study_id: Optional[str] +) -> List[Dict]: async with docker_utils.docker_client() as client: # pylint: disable=not-async-context-manager try: filters = ["type=main"] if user_id: - filters.append('user_id=' + user_id) + filters.append("user_id=" + user_id) if study_id: - filters.append('study_id=' + study_id) - list_running_services = await client.services.list(filters={'label': filters}) - services_details = [await _get_node_details(app, client, service) for service in list_running_services] + filters.append("study_id=" + study_id) + list_running_services = await client.services.list( + filters={"label": filters} + ) + services_details = [ + await _get_node_details(app, client, service) + for service in list_running_services + ] return services_details except aiodocker.exceptions.DockerError as err: log.exception( - "Error while listing services with user_id, study_id %s, %s", user_id, study_id) + "Error while listing services with user_id, study_id %s, %s", + user_id, + study_id, + ) raise exceptions.GenericDockerError( - "Error while accessing container", err) from err + "Error while accessing container", err + ) from err async def get_service_details(app: web.Application, node_uuid: str) -> Dict: async with docker_utils.docker_client() as client: # pylint: disable=not-async-context-manager try: list_running_services_with_uuid = await client.services.list( - filters={'label': ['uuid=' + node_uuid, "type=main"]}) + filters={"label": ["uuid=" + node_uuid, "type=main"]} + ) # error if no service with such an id exists if not list_running_services_with_uuid: raise exceptions.ServiceUUIDNotFoundError(node_uuid) if len(list_running_services_with_uuid) > 1: # someone did something fishy here - raise exceptions.DirectorException(msg="More than one docker service is labeled as main service") + raise exceptions.DirectorException( + msg="More than one docker service is labeled as main service" + ) - node_details = await _get_node_details(app, client, list_running_services_with_uuid[0]) + node_details = await _get_node_details( + app, client, list_running_services_with_uuid[0] + ) return node_details except aiodocker.exceptions.DockerError as err: - log.exception( - "Error while accessing container with uuid: %s", node_uuid) + log.exception("Error while accessing container with uuid: %s", node_uuid) raise exceptions.GenericDockerError( - "Error while accessing container", err) from err + "Error while accessing container", err + ) from err async def stop_service(app: web.Application, node_uuid: str) -> None: log.debug("stopping service with uuid %s", node_uuid) # get the docker client - async with docker_utils.docker_client() as client: # pylint: disable=not-async-context-manager + async with docker_utils.docker_client() as client: # pylint: disable=not-async-context-manager try: list_running_services_with_uuid = await client.services.list( - filters={'label': 'uuid=' + node_uuid}) + filters={"label": "uuid=" + node_uuid} + ) except aiodocker.exceptions.DockerError as err: log.exception("Error while stopping container with uuid: %s", node_uuid) - raise exceptions.GenericDockerError("Error while stopping container", err) from err + raise exceptions.GenericDockerError( + "Error while stopping container", err + ) from err # error if no service with such an id exists if not list_running_services_with_uuid: @@ -676,22 +849,33 @@ async def stop_service(app: web.Application, node_uuid: str) -> None: # save the state of the main service if it can service_details = await get_service_details(app, node_uuid) # FIXME: the exception for the 3d-viewer shall be removed once the dy-sidecar comes in - service_host_name = "{}:{}{}".format(service_details["service_host"], - service_details["service_port"] if service_details["service_port"] else "80", - service_details["service_basepath"] if not "3d-viewer" in service_details["service_host"] else "") + service_host_name = "{}:{}{}".format( + service_details["service_host"], + service_details["service_port"] + if service_details["service_port"] + else "80", + service_details["service_basepath"] + if not "3d-viewer" in service_details["service_host"] + else "", + ) log.debug("saving state of service %s...", service_host_name) try: session = app[APP_CLIENT_SESSION_KEY] service_url = "http://" + service_host_name + "/" + "state" async with session.post(service_url) as response: if 199 < response.status < 300: - log.debug("service %s successfully saved its state", service_host_name) + log.debug( + "service %s successfully saved its state", service_host_name + ) else: - log.warning("service %s does not allow saving state, answered %s", service_host_name, await response.text()) + log.warning( + "service %s does not allow saving state, answered %s", + service_host_name, + await response.text(), + ) except ClientConnectionError: log.exception("service %s could not be contacted, state not saved") - # remove the services try: log.debug("removing services...") From fc1638e7c5a6b1eea0838ecbe1ca2f00430728b5 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 7 Apr 2020 17:50:15 +0200 Subject: [PATCH 25/68] add director remote debugging entry --- .vscode-template/launch.json | 41 ++++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/.vscode-template/launch.json b/.vscode-template/launch.json index 4067e02a93d..14186ec35ec 100644 --- a/.vscode-template/launch.json +++ b/.vscode-template/launch.json @@ -4,6 +4,19 @@ // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 "version": "0.2.0", "configurations": [ + { + "name": "Python: Remote Attach director", + "type": "python", + "request": "attach", + "port": 3004, + "host": "127.0.0.1", + "pathMappings": [ + { + "localRoot": "${workspaceFolder}", + "remoteRoot": "/devel" + } + ] + }, { "name": "Python: Remote Attach webserver", "type": "python", @@ -17,19 +30,19 @@ } ] }, - { - "name": "Python: Remote Attach sidecar", - "type": "python", - "request": "attach", - "port": 3002, - "host": "127.0.0.1", - "pathMappings": [ - { - "localRoot": "${workspaceFolder}", - "remoteRoot": "/devel" - } - ] - }, + { + "name": "Python: Remote Attach sidecar", + "type": "python", + "request": "attach", + "port": 3002, + "host": "127.0.0.1", + "pathMappings": [ + { + "localRoot": "${workspaceFolder}", + "remoteRoot": "/devel" + } + ] + }, { "name": "Python: Remote Attach storage", "type": "python", @@ -57,4 +70,4 @@ ] } ] -} +} \ No newline at end of file From f99273d45edaa4a49c946bb18f66818084b49fd9 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 7 Apr 2020 17:50:36 +0200 Subject: [PATCH 26/68] set default logs at warning level --- services/docker-compose.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/services/docker-compose.yml b/services/docker-compose.yml index 454d88599b9..9eb989a13fe 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -225,7 +225,7 @@ services: command: - "--api=true" - "--api.dashboard=true" - - "--log.level=INFO" + - "--log.level=WARNING" - "--accesslog=false" - "--metrics.prometheus=true" - "--metrics.prometheus.addEntryPointsLabels=true" @@ -236,6 +236,7 @@ services: - "--entryPoints.traefik_dashboard.address=:8080" - "--providers.docker.endpoint=unix:///var/run/docker.sock" - "--providers.docker.swarmMode=true" + #FIXME: the zone must be prefixed with the stack name!!! - "--providers.docker.network=simcore_default" - "--providers.docker.exposedByDefault=false" #FIXME: the zone must be prefixed with the stack name!!! From b9b514a828c836cae87606351f7e399ac8129f9e Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 7 Apr 2020 17:50:49 +0200 Subject: [PATCH 27/68] traefik dashboard in devel mode --- services/docker-compose.devel.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/services/docker-compose.devel.yml b/services/docker-compose.devel.yml index 3fc1118a882..b1e8af2375e 100644 --- a/services/docker-compose.devel.yml +++ b/services/docker-compose.devel.yml @@ -53,3 +53,16 @@ services: - ../packages:/devel/packages environment: - SC_BOOT_MODE=debug-ptvsd + + traefik: + deploy: + # currently only available in devel mode as this conflicts with the external traefik dashboard + labels: + - io.simcore.zone=internal_simcore_stack + - traefik.enable=true + # traefik UI + - traefik.http.routers.api_internal.service=api@internal + - traefik.http.routers.api_internal.rule=PathPrefix(`/dashboard`) || PathPrefix(`/api`) + - traefik.http.routers.api_internal.entrypoints=traefik_dashboard + - traefik.http.routers.api_internal.middlewares=gzip@docker + - traefik.http.services.api_internal.loadbalancer.server.port=8080 From 82d5055f27a5e8cdae8bcb120b26601c52e31b53 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 7 Apr 2020 18:06:16 +0200 Subject: [PATCH 28/68] improve user feedback on service state --- .../class/osparc/component/service/NodeStatus.js | 8 ++++++++ .../web/client/source/class/osparc/data/model/Node.js | 10 ++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/services/web/client/source/class/osparc/component/service/NodeStatus.js b/services/web/client/source/class/osparc/component/service/NodeStatus.js index ad6a584772d..8b67fac95fb 100644 --- a/services/web/client/source/class/osparc/component/service/NodeStatus.js +++ b/services/web/client/source/class/osparc/component/service/NodeStatus.js @@ -63,6 +63,10 @@ qx.Class.define("osparc.component.service.NodeStatus", { return this.tr("Starting..."); } else if (status === "pending") { return this.tr("Pending..."); + } else if (status === "pulling") { + return this.tr("Pulling..."); + } else if (status === "connecting") { + return this.tr("Connecting..."); } return this.tr("Idle"); } @@ -78,6 +82,10 @@ qx.Class.define("osparc.component.service.NodeStatus", { return "@FontAwesome5Solid/circle-notch/12"; } else if (status === "pending") { return "@FontAwesome5Solid/circle-notch/12"; + } else if (status === "pulling") { + return "@FontAwesome5Solid/circle-notch/12"; + } else if (status === "connecting") { + return "@FontAwesome5Solid/circle-notch/12"; } return "@FontAwesome5Solid/check/12"; }, diff --git a/services/web/client/source/class/osparc/data/model/Node.js b/services/web/client/source/class/osparc/data/model/Node.js index ea8ca97cfc4..48ff1991335 100644 --- a/services/web/client/source/class/osparc/data/model/Node.js +++ b/services/web/client/source/class/osparc/data/model/Node.js @@ -931,13 +931,18 @@ qx.Class.define("osparc.data.model.Node", { __onNodeState: function(data) { const serviceState = data["service_state"]; switch (serviceState) { - case "starting": - case "pulling": { + case "starting": { this.setInteractiveStatus("starting"); const interval = 5000; qx.event.Timer.once(() => this.__nodeState(), this, interval); break; } + case "pulling": { + this.setInteractiveStatus("pulling"); + const interval = 5000; + qx.event.Timer.once(() => this.__nodeState(), this, interval); + break; + } case "pending": { this.setInteractiveStatus("pending"); const interval = 10000; @@ -1028,6 +1033,7 @@ qx.Class.define("osparc.data.model.Node", { }, this); pingRequest.addListenerOnce("fail", e => { const error = e.getTarget().getResponse(); + this.setInteractiveStatus("connecting"); console.log("service not ready yet, waiting... " + error); const interval = 1000; qx.event.Timer.once(() => this.__waitForServiceReady(srvUrl), this, interval); From 0026f337b0a622f5feb1d24139ccfdb9f519542a Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 9 Apr 2020 11:40:01 +0200 Subject: [PATCH 29/68] check if reverse-proxy settings are set --- .../src/simcore_service_director/producer.py | 63 +++++++++---------- 1 file changed, 28 insertions(+), 35 deletions(-) diff --git a/services/director/src/simcore_service_director/producer.py b/services/director/src/simcore_service_director/producer.py index 8597c919ac2..3ac532b676e 100644 --- a/services/director/src/simcore_service_director/producer.py +++ b/services/director/src/simcore_service_director/producer.py @@ -17,6 +17,7 @@ from .system_utils import get_system_extra_hosts_raw SERVICE_RUNTIME_SETTINGS: str = "simcore.service.settings" +SERVICE_REVERSE_PROXY_SETTINGS: str = "simcore.service.reverse-proxy-settings" SERVICE_RUNTIME_BOOTSETTINGS: str = "simcore.service.bootsettings" log = logging.getLogger(__name__) @@ -59,30 +60,15 @@ def _check_setting_correctness(setting: Dict) -> None: raise exceptions.DirectorException("Invalid setting in %s" % setting) -async def _read_service_settings(app: web.Application, key: str, tag: str) -> Dict: - # pylint: disable=C0103 - image_labels = await registry_proxy.get_image_labels(app, key, tag) - runtime_parameters = ( - json.loads(image_labels[SERVICE_RUNTIME_SETTINGS]) - if SERVICE_RUNTIME_SETTINGS in image_labels - else {} - ) - log.debug("Retrieved service runtime settings: %s", runtime_parameters) - return runtime_parameters - - -async def _get_service_boot_parameters_labels( - app: web.Application, key: str, tag: str +async def _read_service_settings( + app: web.Application, key: str, tag: str, settings_name: str ) -> Dict: - # pylint: disable=C0103 image_labels = await registry_proxy.get_image_labels(app, key, tag) - boot_params = ( - json.loads(image_labels[SERVICE_RUNTIME_BOOTSETTINGS]) - if SERVICE_RUNTIME_BOOTSETTINGS in image_labels - else {} + settings = ( + json.loads(image_labels[settings_name]) if settings_name in image_labels else {} ) - log.debug("Retrieved service boot settings: %s", boot_params) - return boot_params + log.debug("Retrieved %s settings: %s", settings_name, settings) + return settings # pylint: disable=too-many-branches @@ -100,7 +86,10 @@ async def _create_docker_service_params( ) -> Dict: service_parameters_labels = await _read_service_settings( - app, service_key, service_tag + app, service_key, service_tag, SERVICE_RUNTIME_SETTINGS + ) + reverse_proxy_settings = await _read_service_settings( + app, service_key, service_tag, SERVICE_REVERSE_PROXY_SETTINGS ) service_name = registry_proxy.get_service_last_names(service_key) + "_" + node_uuid log.debug("Converting labels to docker runtime parameters") @@ -159,16 +148,18 @@ async def _create_docker_service_params( }, "networks": [internal_network_id] if internal_network_id else [], } - if "3d-viewer" in service_name: - # FIXME: the exception for the 3d-viewer shall be removed once the dy-sidecar comes in - # Paraview visualizer needs a strip prefix here, this should be removed once dy-sidecar is in or that - # all dynamic services are converted to using traefik as reverse proxy instead of webserver - docker_params["labels"][ - f"traefik.http.middlewares.{service_name}_stripprefixregex.stripprefixregex.regex" - ] = f"^/x/{node_uuid}" - docker_params["labels"][ - f"traefik.http.routers.{service_name}.middlewares" - ] += f", {service_name}_stripprefixregex" + if reverse_proxy_settings: + # some services define strip_path:true if they need the path to be stripped away + if ( + "strip_path" in reverse_proxy_settings + and reverse_proxy_settings["strip_path"] + ): + docker_params["labels"][ + f"traefik.http.middlewares.{service_name}_stripprefixregex.stripprefixregex.regex" + ] = f"^/x/{node_uuid}" + docker_params["labels"][ + f"traefik.http.routers.{service_name}.middlewares" + ] += f", {service_name}_stripprefixregex" for param in service_parameters_labels: _check_setting_correctness(param) @@ -583,8 +574,8 @@ async def _start_docker_service( service = await client.services.inspect(service["ID"]) published_port, target_port = await _get_docker_image_port_mapping(service) # now pass boot parameters - service_boot_parameters_labels = await _get_service_boot_parameters_labels( - app, service_key, service_tag + service_boot_parameters_labels = await _read_service_settings( + app, service_key, service_tag, SERVICE_RUNTIME_BOOTSETTINGS ) service_entrypoint = _get_service_entrypoint(service_boot_parameters_labels) if published_port: @@ -743,7 +734,9 @@ async def _get_node_details( # get boot parameters results = await asyncio.gather( - _get_service_boot_parameters_labels(app, service_key, service_tag), + _read_service_settings( + app, service_key, service_tag, SERVICE_RUNTIME_BOOTSETTINGS + ), _get_service_basepath_from_docker_service(service), _get_service_state(client, service), ) From d701f3fb91fcdf1965f1f64c8482240713f3e6e6 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 14 Apr 2020 10:35:42 +0200 Subject: [PATCH 30/68] add dockerfile linter --- .vscode-template/settings.json | 3 ++- scripts/hadolint.bash | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) create mode 100755 scripts/hadolint.bash diff --git a/.vscode-template/settings.json b/.vscode-template/settings.json index 719d1639a04..549eab70de6 100644 --- a/.vscode-template/settings.json +++ b/.vscode-template/settings.json @@ -29,8 +29,9 @@ }, "python.testing.pyTestEnabled": true, "autoDocstring.docstringFormat": "sphinx", + "hadolint.hadolintPath": "${workspaceFolder}/scripts/hadolint.bash", "shellcheck.executablePath": "${workspaceFolder}/scripts/shellcheck.bash", "shellcheck.run": "onSave", "shellcheck.enableQuickFix": true, "python.formatting.provider": "black" -} +} \ No newline at end of file diff --git a/scripts/hadolint.bash b/scripts/hadolint.bash new file mode 100755 index 00000000000..2d8ef93b938 --- /dev/null +++ b/scripts/hadolint.bash @@ -0,0 +1,4 @@ +#!/bin/bash +# dockerfile linter tool +# - https://github.com/hadolint/hadolint +exec docker run --rm -i hadolint/hadolint < "$@" \ No newline at end of file From 2f82a589efab832322c4e67bf9e531b3eb5b8b25 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 14 Apr 2020 20:47:27 +0200 Subject: [PATCH 31/68] env variables not taken from env --- services/docker-compose.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/services/docker-compose.yml b/services/docker-compose.yml index 9eb989a13fe..4ccacd101d2 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -59,10 +59,10 @@ services: init: true environment: - BUILD_DATE=${BUILD_DATE:-1970-01-01T01:00:00Z} - - DIRECTOR_HOST=director - - DIRECTOR_PORT=8080 - - STORAGE_HOST=storage - - STORAGE_PORT=8080 + - DIRECTOR_HOST=${DIRECTOR_HOST:-director} + - DIRECTOR_PORT=${DIRECTOR_PORT:-8080} + - STORAGE_HOST=${STORAGE_HOST:-storage} + - STORAGE_PORT=${STORAGE_PORT:-8080} - CATALOG_HOST=${CATALOG_HOST:-catalog} - SWARM_STACK_NAME=${SWARM_STACK_NAME:-simcore} - WEBSERVER_LOGLEVEL=${LOG_LEVEL:-WARNING} From 123160cc0ab2c6627d83649f59b5e2f817ead707 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 14 Apr 2020 22:14:52 +0200 Subject: [PATCH 32/68] added TRAEFIK_SIMCORE_ZONE env variable --- .../src/simcore_service_director/config.py | 56 +++++++++++++------ .../src/simcore_service_director/producer.py | 3 +- services/docker-compose.yml | 1 + 3 files changed, 42 insertions(+), 18 deletions(-) diff --git a/services/director/src/simcore_service_director/config.py b/services/director/src/simcore_service_director/config.py index eea9a933804..b189d781117 100644 --- a/services/director/src/simcore_service_director/config.py +++ b/services/director/src/simcore_service_director/config.py @@ -11,14 +11,23 @@ logging.basicConfig( level=logging.DEBUG if DEBUG_MODE else logging.INFO, - format='%(levelname)s:%(name)s-%(lineno)d: %(message)s' - ) + format="%(levelname)s:%(name)s-%(lineno)d: %(message)s", +) API_VERSION: str = "v0" API_ROOT: str = "api" -DIRECTOR_REGISTRY_CACHING: bool = os.environ.get("DIRECTOR_REGISTRY_CACHING", True) in ["true", "True", True] -DIRECTOR_REGISTRY_CACHING_TTL: int = int(os.environ.get("DIRECTOR_REGISTRY_CACHING_TTL", 15*60)) +DIRECTOR_REGISTRY_CACHING: bool = os.environ.get("DIRECTOR_REGISTRY_CACHING", True) in [ + "true", + "True", + True, +] +DIRECTOR_REGISTRY_CACHING_TTL: int = int( + os.environ.get("DIRECTOR_REGISTRY_CACHING_TTL", 15 * 60) +) +TRAEFIK_SIMCORE_ZONE: str = os.environ.get( + "TRAEFIK_INTERNAL_ZONE", "internal_simcore_stack" +) APP_REGISTRY_CACHE_DATA_KEY: str = __name__ + "_registry_cache_data" REGISTRY_AUTH: bool = os.environ.get("REGISTRY_AUTH", False) in ["true", "True", True] @@ -31,28 +40,43 @@ # these are the envs passed to the dynamic services by default SERVICES_DEFAULT_ENVS: Dict[str, str] = { - "POSTGRES_ENDPOINT": os.environ.get("POSTGRES_ENDPOINT", "undefined postgres endpoint"), + "POSTGRES_ENDPOINT": os.environ.get( + "POSTGRES_ENDPOINT", "undefined postgres endpoint" + ), "POSTGRES_USER": os.environ.get("POSTGRES_USER", "undefined postgres user"), - "POSTGRES_PASSWORD": os.environ.get("POSTGRES_PASSWORD", "undefined postgres password"), + "POSTGRES_PASSWORD": os.environ.get( + "POSTGRES_PASSWORD", "undefined postgres password" + ), "POSTGRES_DB": os.environ.get("POSTGRES_DB", "undefined postgres db"), - "STORAGE_ENDPOINT": os.environ.get("STORAGE_ENDPOINT", "undefined storage endpoint") + "STORAGE_ENDPOINT": os.environ.get( + "STORAGE_ENDPOINT", "undefined storage endpoint" + ), } # some services need to know the published host to be functional (paraview) # TODO: please review if needed PUBLISHED_HOST_NAME: str = os.environ.get("PUBLISHED_HOST_NAME", "") # used when in devel mode vs release mode -NODE_SCHEMA_LOCATION: str = os.environ.get("NODE_SCHEMA_LOCATION", - f"{API_ROOT}/{API_VERSION}/schemas/node-meta-v0.0.1.json") +NODE_SCHEMA_LOCATION: str = os.environ.get( + "NODE_SCHEMA_LOCATION", f"{API_ROOT}/{API_VERSION}/schemas/node-meta-v0.0.1.json" +) # used to find the right network name -SIMCORE_SERVICES_NETWORK_NAME: Optional[str] = os.environ.get("SIMCORE_SERVICES_NETWORK_NAME") +SIMCORE_SERVICES_NETWORK_NAME: Optional[str] = os.environ.get( + "SIMCORE_SERVICES_NETWORK_NAME" +) # useful when developing with an alternative registry namespace -SIMCORE_SERVICES_PREFIX: str = os.environ.get("SIMCORE_SERVICES_PREFIX", "simcore/services") +SIMCORE_SERVICES_PREFIX: str = os.environ.get( + "SIMCORE_SERVICES_PREFIX", "simcore/services" +) # tracing -TRACING_ENABLED: bool = os.environ.get("TRACING_ENABLED", True) in ["true", "True", True] -TRACING_ZIPKIN_ENDPOINT: str = os.environ.get("TRACING_ZIPKIN_ENDPOINT", "http://jaeger:9411") - -__all__ = [ - 'APP_CLIENT_SESSION_KEY' +TRACING_ENABLED: bool = os.environ.get("TRACING_ENABLED", True) in [ + "true", + "True", + True, ] +TRACING_ZIPKIN_ENDPOINT: str = os.environ.get( + "TRACING_ZIPKIN_ENDPOINT", "http://jaeger:9411" +) + +__all__ = ["APP_CLIENT_SESSION_KEY"] diff --git a/services/director/src/simcore_service_director/producer.py b/services/director/src/simcore_service_director/producer.py index 3ac532b676e..bae92fae638 100644 --- a/services/director/src/simcore_service_director/producer.py +++ b/services/director/src/simcore_service_director/producer.py @@ -138,8 +138,7 @@ async def _create_docker_service_params( "study_id": project_id, "user_id": user_id, "type": "main" if main_service else "dependency", - # FIXME: the zone must be prefixed with the stack name!!! - "io.simcore.zone": "internal_simcore_stack", + "io.simcore.zone": f"{config.TRAEFIK_SIMCORE_ZONE}", "traefik.enable": "true", f"traefik.http.services.{service_name}.loadbalancer.server.port": 8080, f"traefik.http.routers.{service_name}.rule": f"PathPrefix(`/x/{node_uuid}`)", diff --git a/services/docker-compose.yml b/services/docker-compose.yml index 4ccacd101d2..0ef2dc4d81e 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -42,6 +42,7 @@ services: - SIMCORE_SERVICES_NETWORK_NAME=interactive_services_subnet - TRACING_ENABLED=${TRACING_ENABLED:-True} - TRACING_ZIPKIN_ENDPOINT=${TRACING_ZIPKIN_ENDPOINT:-http://jaeger:9411} + - TRAEFIK_SIMCORE_ZONE=${TRAEFIK_SIMCORE_ZONE:-internal_simcore_stack} - LOGLEVEL=${LOG_LEVEL:-WARNING} volumes: - "/var/run/docker.sock:/var/run/docker.sock" From 7ff9919761ee372ccdea4e4eaaecaeb9525310e2 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 14 Apr 2020 22:15:07 +0200 Subject: [PATCH 33/68] fixed entrypoint extension --- packages/postgres-database/docker/Dockerfile | 2 +- .../postgres-database/docker/{entrypoint.sh => entrypoint.bash} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename packages/postgres-database/docker/{entrypoint.sh => entrypoint.bash} (100%) diff --git a/packages/postgres-database/docker/Dockerfile b/packages/postgres-database/docker/Dockerfile index d4218617487..d0958e6c81f 100644 --- a/packages/postgres-database/docker/Dockerfile +++ b/packages/postgres-database/docker/Dockerfile @@ -18,7 +18,7 @@ RUN apk add --no-cache \ bash \ postgresql -COPY entrypoint.sh /home/entrypoint.sh +COPY entrypoint.bash /home/entrypoint.bash ENV POSTGRES_USER=scu \ diff --git a/packages/postgres-database/docker/entrypoint.sh b/packages/postgres-database/docker/entrypoint.bash similarity index 100% rename from packages/postgres-database/docker/entrypoint.sh rename to packages/postgres-database/docker/entrypoint.bash From a3673def40c840093821f314aa47de60d5033196 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 14 Apr 2020 22:20:10 +0200 Subject: [PATCH 34/68] add default value for traefik internal zone --- .env-devel | 2 ++ services/docker-compose.yml | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.env-devel b/.env-devel index a478f55a4b2..5fdbf369962 100644 --- a/.env-devel +++ b/.env-devel @@ -45,6 +45,8 @@ STORAGE_ENDPOINT=storage:8080 TRACING_ENABLED=1 TRACING_ZIPKIN_ENDPOINT=http://jaeger:9411 +TRAEFIK_SIMCORE_ZONE=internal_simcore_stack + WEBSERVER_LOGIN_REGISTRATION_CONFIRMATION_REQUIRED=1 WEBSERVER_LOGIN_REGISTRATION_INVITATION_REQUIRED=1 # python3 -c "from cryptography.fernet import Fernet; print(Fernet.generate_key())" diff --git a/services/docker-compose.yml b/services/docker-compose.yml index 0ef2dc4d81e..3d81c5faf2c 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -241,7 +241,7 @@ services: - "--providers.docker.network=simcore_default" - "--providers.docker.exposedByDefault=false" #FIXME: the zone must be prefixed with the stack name!!! - - "--providers.docker.constraints=Label(`io.simcore.zone`, `internal_simcore_stack`)" + - "--providers.docker.constraints=Label(`io.simcore.zone`, `${TRAEFIK_SIMCORE_ZONE}`)" - "--tracing=true" - "--tracing.jaeger=true" - "--tracing.jaeger.samplingServerURL=http://jaeger:5778/sampling" From 2c703c5f49591d4037b7054997ebcb251db69ab1 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 14 Apr 2020 22:20:53 +0200 Subject: [PATCH 35/68] removed unnecessary comments --- services/docker-compose.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/services/docker-compose.yml b/services/docker-compose.yml index 3d81c5faf2c..b0f596fe290 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -237,10 +237,7 @@ services: - "--entryPoints.traefik_dashboard.address=:8080" - "--providers.docker.endpoint=unix:///var/run/docker.sock" - "--providers.docker.swarmMode=true" - #FIXME: the zone must be prefixed with the stack name!!! - - "--providers.docker.network=simcore_default" - "--providers.docker.exposedByDefault=false" - #FIXME: the zone must be prefixed with the stack name!!! - "--providers.docker.constraints=Label(`io.simcore.zone`, `${TRAEFIK_SIMCORE_ZONE}`)" - "--tracing=true" - "--tracing.jaeger=true" From 6c5e42296768ee7ba24b8b968580f72c27059ad9 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 14 Apr 2020 22:27:51 +0200 Subject: [PATCH 36/68] linting --- services/director/src/simcore_service_director/producer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/director/src/simcore_service_director/producer.py b/services/director/src/simcore_service_director/producer.py index bae92fae638..e34a62a1efa 100644 --- a/services/director/src/simcore_service_director/producer.py +++ b/services/director/src/simcore_service_director/producer.py @@ -84,7 +84,7 @@ async def _create_docker_service_params( node_base_path: str, internal_network_id: Optional[str], ) -> Dict: - + # pylint: disable=too-many-statements service_parameters_labels = await _read_service_settings( app, service_key, service_tag, SERVICE_RUNTIME_SETTINGS ) From 2f39f9e063f29e07f9e8880f3f9ce3b7775bac64 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 14 Apr 2020 22:46:56 +0200 Subject: [PATCH 37/68] webserver must be in right internal network --- services/docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/docker-compose.yml b/services/docker-compose.yml index b0f596fe290..d92938616fa 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -81,7 +81,7 @@ services: - node.platform.os == linux - node.role == manager labels: - - io.simcore.zone=internal_simcore_stack + - io.simcore.zone=${TRAEFIK_SIMCORE_ZONE} # gzip compression - traefik.http.middlewares.gzip.compress=true # ssl header necessary so that socket.io upgrades correctly from polling to websocket mode. the middleware must be attached to the right connection. From 6ed06267ec7171436856d780ec88b38be2bbff39 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Wed, 15 Apr 2020 11:16:18 +0200 Subject: [PATCH 38/68] fix call in aiodocker --- services/director/src/simcore_service_director/producer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/services/director/src/simcore_service_director/producer.py b/services/director/src/simcore_service_director/producer.py index e34a62a1efa..8a33e76ef3c 100644 --- a/services/director/src/simcore_service_director/producer.py +++ b/services/director/src/simcore_service_director/producer.py @@ -67,6 +67,7 @@ async def _read_service_settings( settings = ( json.loads(image_labels[settings_name]) if settings_name in image_labels else {} ) + log.debug("Retrieved %s settings: %s", settings_name, settings) return settings @@ -140,7 +141,7 @@ async def _create_docker_service_params( "type": "main" if main_service else "dependency", "io.simcore.zone": f"{config.TRAEFIK_SIMCORE_ZONE}", "traefik.enable": "true", - f"traefik.http.services.{service_name}.loadbalancer.server.port": 8080, + f"traefik.http.services.{service_name}.loadbalancer.server.port": "8080", f"traefik.http.routers.{service_name}.rule": f"PathPrefix(`/x/{node_uuid}`)", f"traefik.http.routers.{service_name}.entrypoints": "http", f"traefik.http.routers.{service_name}.middlewares": f"gzip@docker", From b28d7a23e014eaac36c5f3e8a4bb1730ce49639c Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Wed, 15 Apr 2020 11:36:24 +0200 Subject: [PATCH 39/68] traefik is added into the services --- tests/swarm-deploy/test_swarm_runs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/swarm-deploy/test_swarm_runs.py b/tests/swarm-deploy/test_swarm_runs.py index 921412b8d00..6951fc62144 100644 --- a/tests/swarm-deploy/test_swarm_runs.py +++ b/tests/swarm-deploy/test_swarm_runs.py @@ -35,6 +35,7 @@ "rabbit", "postgres", "redis", + "traefik", ] stack_name = os.environ.get("SWARM_STACK_NAME", "simcore") From 9ae245bae61ffa2ad8489fe454c4e3dac96077d7 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Wed, 15 Apr 2020 11:58:25 +0200 Subject: [PATCH 40/68] unnecessary import --- tests/swarm-deploy/test_swarm_runs.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/swarm-deploy/test_swarm_runs.py b/tests/swarm-deploy/test_swarm_runs.py index 6951fc62144..ca1f9d98f48 100644 --- a/tests/swarm-deploy/test_swarm_runs.py +++ b/tests/swarm-deploy/test_swarm_runs.py @@ -2,7 +2,6 @@ # pylint:disable=unused-argument # pylint:disable=redefined-outer-name -import asyncio import logging import os import sys From 83369368a0047d73dad363981b26995ac3288e34 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Wed, 15 Apr 2020 13:40:55 +0200 Subject: [PATCH 41/68] @pcrespov verbosify --- scripts/hadolint.bash | 2 +- scripts/shellcheck.bash | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/hadolint.bash b/scripts/hadolint.bash index 2d8ef93b938..7a8370757f6 100755 --- a/scripts/hadolint.bash +++ b/scripts/hadolint.bash @@ -1,4 +1,4 @@ #!/bin/bash # dockerfile linter tool # - https://github.com/hadolint/hadolint -exec docker run --rm -i hadolint/hadolint < "$@" \ No newline at end of file +exec docker run --rm --interactive hadolint/hadolint < "$@" \ No newline at end of file diff --git a/scripts/shellcheck.bash b/scripts/shellcheck.bash index c18056ae068..2fea124a928 100755 --- a/scripts/shellcheck.bash +++ b/scripts/shellcheck.bash @@ -5,4 +5,4 @@ # - VS extension: https://github.com/timonwong/vscode-shellcheck # -exec docker run --rm -i -v "$PWD:/mnt:ro" koalaman/shellcheck:v0.7.0 "$@" +exec docker run --rm --interactive --volume "$PWD:/mnt:ro" koalaman/shellcheck:v0.7.0 "$@" From 5fffaa76e96c7f56ab346152a46ff2b73cbaa0c3 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Wed, 15 Apr 2020 14:42:39 +0200 Subject: [PATCH 42/68] added env variables to allow director to pass certificates through secrets to spawned services --- .../src/simcore_service_director/config.py | 11 +++++++++++ .../src/simcore_service_director/producer.py | 15 ++++++++++++++- services/docker-compose.yml | 10 +++++++--- 3 files changed, 32 insertions(+), 4 deletions(-) diff --git a/services/director/src/simcore_service_director/config.py b/services/director/src/simcore_service_director/config.py index b189d781117..2de3aa75cdf 100644 --- a/services/director/src/simcore_service_director/config.py +++ b/services/director/src/simcore_service_director/config.py @@ -25,6 +25,15 @@ DIRECTOR_REGISTRY_CACHING_TTL: int = int( os.environ.get("DIRECTOR_REGISTRY_CACHING_TTL", 15 * 60) ) + +# for passing self-signed certificate to spawned services +DIRECTOR_SELF_SIGNED_SSL_SECRET_ID: str = os.environ.get( + "DIRECTOR_SELF_SIGNED_SSL_SECRET_ID", "" +) +DIRECTOR_SELF_SIGNED_SSL_FILENAME: str = os.environ.get( + "DIRECTOR_SELF_SIGNED_SSL_FILENAME", "" +) + TRAEFIK_SIMCORE_ZONE: str = os.environ.get( "TRAEFIK_INTERNAL_ZONE", "internal_simcore_stack" ) @@ -56,6 +65,8 @@ # some services need to know the published host to be functional (paraview) # TODO: please review if needed PUBLISHED_HOST_NAME: str = os.environ.get("PUBLISHED_HOST_NAME", "") + + # used when in devel mode vs release mode NODE_SCHEMA_LOCATION: str = os.environ.get( "NODE_SCHEMA_LOCATION", f"{API_ROOT}/{API_VERSION}/schemas/node-meta-v0.0.1.json" diff --git a/services/director/src/simcore_service_director/producer.py b/services/director/src/simcore_service_director/producer.py index 8a33e76ef3c..933ec55c640 100644 --- a/services/director/src/simcore_service_director/producer.py +++ b/services/director/src/simcore_service_director/producer.py @@ -95,7 +95,7 @@ async def _create_docker_service_params( service_name = registry_proxy.get_service_last_names(service_key) + "_" + node_uuid log.debug("Converting labels to docker runtime parameters") container_spec = { - "Image": "{}/{}:{}".format(config.REGISTRY_URL, service_key, service_tag), + "Image": f"{config.REGISTRY_URL}/{service_key}:{service_tag}", "Env": { **config.SERVICES_DEFAULT_ENVS, "SIMCORE_USER_ID": user_id, @@ -109,6 +109,18 @@ async def _create_docker_service_params( "Labels": {"user_id": user_id, "study_id": project_id, "node_id": node_uuid}, } + if config.DIRECTOR_SELF_SIGNED_SSL and config.DIRECTOR_SELF_SIGNED_SSL_SECRET_ID: + # Note: this is useful for S3 client in case of self signed certificate + container_spec["Env"][ + "SSL_CERT_FILE" + ] = config.DIRECTOR_SELF_SIGNED_SSL_FILENAME + container_spec["Secrets"] = [ + { + "SecretID": config.DIRECTOR_SELF_SIGNED_SSL_SECRET_ID, + "File": {"Name": config.DIRECTOR_SELF_SIGNED_SSL_FILENAME}, + } + ] + docker_params = { "auth": await _create_auth() if config.REGISTRY_AUTH else {}, "registry": config.REGISTRY_URL if config.REGISTRY_AUTH else "", @@ -148,6 +160,7 @@ async def _create_docker_service_params( }, "networks": [internal_network_id] if internal_network_id else [], } + if reverse_proxy_settings: # some services define strip_path:true if they need the path to be stripped away if ( diff --git a/services/docker-compose.yml b/services/docker-compose.yml index d92938616fa..6185942dee7 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -27,6 +27,8 @@ services: - REGISTRY_SSL=${REGISTRY_SSL} - DIRECTOR_REGISTRY_CACHING=${DIRECTOR_REGISTRY_CACHING} - DIRECTOR_REGISTRY_CACHING_TTL=${DIRECTOR_REGISTRY_CACHING_TTL} + - DIRECTOR_SELF_SIGNED_SSL_SECRET_ID=${DIRECTOR_SELF_SIGNED_SSL_SECRET_ID} + - DIRECTOR_SELF_SIGNED_SSL_FILENAME=${DIRECTOR_SELF_SIGNED_SSL_FILENAME} - POSTGRES_ENDPOINT=${POSTGRES_ENDPOINT} - POSTGRES_USER=${POSTGRES_USER} - POSTGRES_PASSWORD=${POSTGRES_PASSWORD} @@ -226,8 +228,8 @@ services: command: - "--api=true" - "--api.dashboard=true" - - "--log.level=WARNING" - - "--accesslog=false" + - "--log.level=DEBUG" + - "--accesslog=true" - "--metrics.prometheus=true" - "--metrics.prometheus.addEntryPointsLabels=true" - "--metrics.prometheus.addServicesLabels=true" @@ -236,9 +238,11 @@ services: - "--entryPoints.http.address=:80" - "--entryPoints.traefik_dashboard.address=:8080" - "--providers.docker.endpoint=unix:///var/run/docker.sock" + # FIXME: check this one + - "--providers.docker.network=simcore_default" - "--providers.docker.swarmMode=true" - "--providers.docker.exposedByDefault=false" - - "--providers.docker.constraints=Label(`io.simcore.zone`, `${TRAEFIK_SIMCORE_ZONE}`)" + # - "--providers.docker.constraints=Label(`io.simcore.zone`, `${TRAEFIK_SIMCORE_ZONE}`)" - "--tracing=true" - "--tracing.jaeger=true" - "--tracing.jaeger.samplingServerURL=http://jaeger:5778/sampling" From 72629c6b2dac43e0749e5669d4edce41cb1ac187 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Wed, 15 Apr 2020 14:43:18 +0200 Subject: [PATCH 43/68] traefik logs to warning --- services/docker-compose.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/docker-compose.yml b/services/docker-compose.yml index 6185942dee7..7d7803e412d 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -228,8 +228,8 @@ services: command: - "--api=true" - "--api.dashboard=true" - - "--log.level=DEBUG" - - "--accesslog=true" + - "--log.level=WARNING" + - "--accesslog=false" - "--metrics.prometheus=true" - "--metrics.prometheus.addEntryPointsLabels=true" - "--metrics.prometheus.addServicesLabels=true" From 00f41f9b88122d5889093098d372f30fb7ae3802 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Wed, 15 Apr 2020 14:44:20 +0200 Subject: [PATCH 44/68] check for io.simcore.zone label --- services/docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/docker-compose.yml b/services/docker-compose.yml index 7d7803e412d..8cacd6183a1 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -242,7 +242,7 @@ services: - "--providers.docker.network=simcore_default" - "--providers.docker.swarmMode=true" - "--providers.docker.exposedByDefault=false" - # - "--providers.docker.constraints=Label(`io.simcore.zone`, `${TRAEFIK_SIMCORE_ZONE}`)" + - "--providers.docker.constraints=Label(`io.simcore.zone`, `${TRAEFIK_SIMCORE_ZONE}`)" - "--tracing=true" - "--tracing.jaeger=true" - "--tracing.jaeger.samplingServerURL=http://jaeger:5778/sampling" From 3cd3925d93e12d6c16985ee91faf901b86e829c4 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Wed, 15 Apr 2020 14:47:00 +0200 Subject: [PATCH 45/68] define the default network for traefik using the stack name --- services/docker-compose.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/services/docker-compose.yml b/services/docker-compose.yml index 8cacd6183a1..5ff44cf4add 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -238,8 +238,7 @@ services: - "--entryPoints.http.address=:80" - "--entryPoints.traefik_dashboard.address=:8080" - "--providers.docker.endpoint=unix:///var/run/docker.sock" - # FIXME: check this one - - "--providers.docker.network=simcore_default" + - "--providers.docker.network=${SWARM_STACK_NAME}_default" - "--providers.docker.swarmMode=true" - "--providers.docker.exposedByDefault=false" - "--providers.docker.constraints=Label(`io.simcore.zone`, `${TRAEFIK_SIMCORE_ZONE}`)" From fd326dd960b058e2eb6267ad1481d8ea13caceb8 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Wed, 15 Apr 2020 14:51:37 +0200 Subject: [PATCH 46/68] typo --- services/director/src/simcore_service_director/producer.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/services/director/src/simcore_service_director/producer.py b/services/director/src/simcore_service_director/producer.py index 933ec55c640..b3dadef137a 100644 --- a/services/director/src/simcore_service_director/producer.py +++ b/services/director/src/simcore_service_director/producer.py @@ -109,7 +109,10 @@ async def _create_docker_service_params( "Labels": {"user_id": user_id, "study_id": project_id, "node_id": node_uuid}, } - if config.DIRECTOR_SELF_SIGNED_SSL and config.DIRECTOR_SELF_SIGNED_SSL_SECRET_ID: + if ( + config.DIRECTOR_SELF_SIGNED_SSL_FILENAME + and config.DIRECTOR_SELF_SIGNED_SSL_SECRET_ID + ): # Note: this is useful for S3 client in case of self signed certificate container_spec["Env"][ "SSL_CERT_FILE" From 4099c6ab6dec3405db9667dbc9d82a88353b7e97 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Wed, 15 Apr 2020 20:09:49 +0200 Subject: [PATCH 47/68] added DIRECTOR_SELF_SIGNED_SSL_SECRET_NAME env variable --- services/director/src/simcore_service_director/config.py | 3 +++ .../director/src/simcore_service_director/producer.py | 9 ++++++++- services/docker-compose.yml | 1 + 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/services/director/src/simcore_service_director/config.py b/services/director/src/simcore_service_director/config.py index 2de3aa75cdf..1be775d8cef 100644 --- a/services/director/src/simcore_service_director/config.py +++ b/services/director/src/simcore_service_director/config.py @@ -30,6 +30,9 @@ DIRECTOR_SELF_SIGNED_SSL_SECRET_ID: str = os.environ.get( "DIRECTOR_SELF_SIGNED_SSL_SECRET_ID", "" ) +DIRECTOR_SELF_SIGNED_SSL_SECRET_NAME: str = os.environ.get( + "DIRECTOR_SELF_SIGNED_SSL_SECRET_NAME", "" +) DIRECTOR_SELF_SIGNED_SSL_FILENAME: str = os.environ.get( "DIRECTOR_SELF_SIGNED_SSL_FILENAME", "" ) diff --git a/services/director/src/simcore_service_director/producer.py b/services/director/src/simcore_service_director/producer.py index b3dadef137a..7765d7be3a3 100644 --- a/services/director/src/simcore_service_director/producer.py +++ b/services/director/src/simcore_service_director/producer.py @@ -112,6 +112,7 @@ async def _create_docker_service_params( if ( config.DIRECTOR_SELF_SIGNED_SSL_FILENAME and config.DIRECTOR_SELF_SIGNED_SSL_SECRET_ID + and config.DIRECTOR_SELF_SIGNED_SSL_SECRET_NAME ): # Note: this is useful for S3 client in case of self signed certificate container_spec["Env"][ @@ -120,7 +121,13 @@ async def _create_docker_service_params( container_spec["Secrets"] = [ { "SecretID": config.DIRECTOR_SELF_SIGNED_SSL_SECRET_ID, - "File": {"Name": config.DIRECTOR_SELF_SIGNED_SSL_FILENAME}, + "SecretName": config.DIRECTOR_SELF_SIGNED_SSL_SECRET_NAME, + "File": { + "Name": config.DIRECTOR_SELF_SIGNED_SSL_FILENAME, + "Mode": 444, + "UID": "0", + "GID": "0", + }, } ] diff --git a/services/docker-compose.yml b/services/docker-compose.yml index 5ff44cf4add..c6e9cf0d62a 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -28,6 +28,7 @@ services: - DIRECTOR_REGISTRY_CACHING=${DIRECTOR_REGISTRY_CACHING} - DIRECTOR_REGISTRY_CACHING_TTL=${DIRECTOR_REGISTRY_CACHING_TTL} - DIRECTOR_SELF_SIGNED_SSL_SECRET_ID=${DIRECTOR_SELF_SIGNED_SSL_SECRET_ID} + - DIRECTOR_SELF_SIGNED_SSL_SECRET_NAME=${DIRECTOR_SELF_SIGNED_SSL_SECRET_NAME} - DIRECTOR_SELF_SIGNED_SSL_FILENAME=${DIRECTOR_SELF_SIGNED_SSL_FILENAME} - POSTGRES_ENDPOINT=${POSTGRES_ENDPOINT} - POSTGRES_USER=${POSTGRES_USER} From f8e57dfda732164b31bcdd16dcd5d3d37793223e Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Wed, 15 Apr 2020 20:09:55 +0200 Subject: [PATCH 48/68] typo --- packages/postgres-database/docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/postgres-database/docker/Dockerfile b/packages/postgres-database/docker/Dockerfile index d0958e6c81f..e2cc5d1b0e9 100644 --- a/packages/postgres-database/docker/Dockerfile +++ b/packages/postgres-database/docker/Dockerfile @@ -27,5 +27,5 @@ ENV POSTGRES_USER=scu \ POSTGRES_PORT=5432 \ POSTGRES_DB=simcoredb -ENTRYPOINT [ "/bin/sh", "/home/entrypoint.sh" ] +ENTRYPOINT [ "/bin/sh", "/home/entrypoint.bash" ] CMD [ "sc-pg", "upgrade" ] From d7e30cd7a48d8aceaa6183c63d7ed012cee58b3b Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 16 Apr 2020 08:33:30 +0200 Subject: [PATCH 49/68] corrected getting wrong env --- services/director/src/simcore_service_director/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/director/src/simcore_service_director/config.py b/services/director/src/simcore_service_director/config.py index 1be775d8cef..bf0f13e76c5 100644 --- a/services/director/src/simcore_service_director/config.py +++ b/services/director/src/simcore_service_director/config.py @@ -38,7 +38,7 @@ ) TRAEFIK_SIMCORE_ZONE: str = os.environ.get( - "TRAEFIK_INTERNAL_ZONE", "internal_simcore_stack" + "TRAEFIK_SIMCORE_ZONE", "internal_simcore_stack" ) APP_REGISTRY_CACHE_DATA_KEY: str = __name__ + "_registry_cache_data" From 5be0fe13803497172d8d4c6fd7a50c67373d6d9b Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 16 Apr 2020 08:51:49 +0200 Subject: [PATCH 50/68] name of routers must be prefixed with swarm stack name --- services/docker-compose.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/services/docker-compose.yml b/services/docker-compose.yml index c6e9cf0d62a..29a243eb438 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -90,10 +90,10 @@ services: # ssl header necessary so that socket.io upgrades correctly from polling to websocket mode. the middleware must be attached to the right connection. - traefik.http.middlewares.simcore_sslheader.headers.customrequestheaders.X-Forwarded-Proto=http - traefik.enable=true - - traefik.http.services.webserver.loadbalancer.server.port=8080 - - traefik.http.routers.webserver.rule=hostregexp(`{host:.+}`) - - traefik.http.routers.webserver.entrypoints=http - - traefik.http.routers.webserver.middlewares=gzip@docker, simcore_sslheader@docker + - traefik.http.services.${SWARM_STACK_NAME}_webserver.loadbalancer.server.port=8080 + - traefik.http.routers.${SWARM_STACK_NAME}_webserver.rule=hostregexp(`{host:.+}`) + - traefik.http.routers.${SWARM_STACK_NAME}_webserver.entrypoints=http + - traefik.http.routers.${SWARM_STACK_NAME}_webserver.middlewares=gzip@docker, simcore_sslheader@docker networks: - default - interactive_services_subnet From af29c0c00cab1c8c73f15b6fede21caae2b8c900 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 16 Apr 2020 08:58:19 +0200 Subject: [PATCH 51/68] set priority to 1 on webserver --- services/docker-compose.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/services/docker-compose.yml b/services/docker-compose.yml index 29a243eb438..b2a143f0ee1 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -93,6 +93,7 @@ services: - traefik.http.services.${SWARM_STACK_NAME}_webserver.loadbalancer.server.port=8080 - traefik.http.routers.${SWARM_STACK_NAME}_webserver.rule=hostregexp(`{host:.+}`) - traefik.http.routers.${SWARM_STACK_NAME}_webserver.entrypoints=http + - traefik.http.routers.${SWARM_STACK_NAME}_webserver.priority=1 - traefik.http.routers.${SWARM_STACK_NAME}_webserver.middlewares=gzip@docker, simcore_sslheader@docker networks: - default From 2b23c0809fa33cd21ed6e4b0dbf9614066d389db Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 16 Apr 2020 09:28:48 +0200 Subject: [PATCH 52/68] set traefik priority to 10 --- services/director/src/simcore_service_director/producer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/services/director/src/simcore_service_director/producer.py b/services/director/src/simcore_service_director/producer.py index 7765d7be3a3..cc1ce86dbde 100644 --- a/services/director/src/simcore_service_director/producer.py +++ b/services/director/src/simcore_service_director/producer.py @@ -166,6 +166,7 @@ async def _create_docker_service_params( f"traefik.http.services.{service_name}.loadbalancer.server.port": "8080", f"traefik.http.routers.{service_name}.rule": f"PathPrefix(`/x/{node_uuid}`)", f"traefik.http.routers.{service_name}.entrypoints": "http", + f"traefik.http.routers.{service_name}.priority": "10", f"traefik.http.routers.{service_name}.middlewares": f"gzip@docker", }, "networks": [internal_network_id] if internal_network_id else [], From 27814fe110dd041b6109bf41d4cd2574a693b6fc Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 16 Apr 2020 10:34:53 +0200 Subject: [PATCH 53/68] remove access to port 9081 since now it goes through traefik --- services/docker-compose.local.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/docker-compose.local.yml b/services/docker-compose.local.yml index 56ac9ecb0bb..e875c962fb7 100644 --- a/services/docker-compose.local.yml +++ b/services/docker-compose.local.yml @@ -40,7 +40,7 @@ services: environment: - SC_BOOT_MODE=${SC_BOOT_MODE:-default} ports: - - ${SIMCORE_PORT:-9081}:8080 + - "8080" - "3001:3000" postgres: From 9363cb0195aca53636a90854c8cedb52223e0302 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 16 Apr 2020 10:39:17 +0200 Subject: [PATCH 54/68] only enable reverse proxy if main service --- services/director/src/simcore_service_director/producer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/director/src/simcore_service_director/producer.py b/services/director/src/simcore_service_director/producer.py index cc1ce86dbde..ad554302156 100644 --- a/services/director/src/simcore_service_director/producer.py +++ b/services/director/src/simcore_service_director/producer.py @@ -162,7 +162,7 @@ async def _create_docker_service_params( "user_id": user_id, "type": "main" if main_service else "dependency", "io.simcore.zone": f"{config.TRAEFIK_SIMCORE_ZONE}", - "traefik.enable": "true", + "traefik.enable": "true" if main_service else "false", f"traefik.http.services.{service_name}.loadbalancer.server.port": "8080", f"traefik.http.routers.{service_name}.rule": f"PathPrefix(`/x/{node_uuid}`)", f"traefik.http.routers.{service_name}.entrypoints": "http", From 3063d891cddc1f6e5b5f4f2ee95988ec033f83af Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 16 Apr 2020 10:57:08 +0200 Subject: [PATCH 55/68] servers on 80 now --- tests/swarm-deploy/test_swarm_runs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/swarm-deploy/test_swarm_runs.py b/tests/swarm-deploy/test_swarm_runs.py index ca1f9d98f48..aa34982f2cd 100644 --- a/tests/swarm-deploy/test_swarm_runs.py +++ b/tests/swarm-deploy/test_swarm_runs.py @@ -144,7 +144,7 @@ def test_core_service_running( def test_check_serve_root(make_up_prod: Dict): - req = urllib.request.Request("http://127.0.0.1:9081/") + req = urllib.request.Request("http://127.0.0.1:80/") try: resp = urllib.request.urlopen(req) charset = resp.info().get_content_charset() From d505167aa3a61288345ea5f2ee2a525b1b5d0290 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 16 Apr 2020 11:33:55 +0200 Subject: [PATCH 56/68] set entrypoint port back to 9081 --- services/docker-compose.local.yml | 2 +- services/docker-compose.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/docker-compose.local.yml b/services/docker-compose.local.yml index e875c962fb7..b5a2e8d655a 100644 --- a/services/docker-compose.local.yml +++ b/services/docker-compose.local.yml @@ -40,7 +40,7 @@ services: environment: - SC_BOOT_MODE=${SC_BOOT_MODE:-default} ports: - - "8080" + - "9081:8080" - "3001:3000" postgres: diff --git a/services/docker-compose.yml b/services/docker-compose.yml index b2a143f0ee1..efd24db20a6 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -237,7 +237,7 @@ services: - "--metrics.prometheus.addServicesLabels=true" - "--entryPoints.metrics.address=:8082" - "--metrics.prometheus.entryPoint=metrics" - - "--entryPoints.http.address=:80" + - "--entryPoints.http.address=:9081" - "--entryPoints.traefik_dashboard.address=:8080" - "--providers.docker.endpoint=unix:///var/run/docker.sock" - "--providers.docker.network=${SWARM_STACK_NAME}_default" From b6ff4dd0f16bf13ac6c236eb71e56f5ea9a1ef89 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 16 Apr 2020 11:34:47 +0200 Subject: [PATCH 57/68] back to 9081 --- tests/swarm-deploy/test_swarm_runs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/swarm-deploy/test_swarm_runs.py b/tests/swarm-deploy/test_swarm_runs.py index aa34982f2cd..ca1f9d98f48 100644 --- a/tests/swarm-deploy/test_swarm_runs.py +++ b/tests/swarm-deploy/test_swarm_runs.py @@ -144,7 +144,7 @@ def test_core_service_running( def test_check_serve_root(make_up_prod: Dict): - req = urllib.request.Request("http://127.0.0.1:80/") + req = urllib.request.Request("http://127.0.0.1:9081/") try: resp = urllib.request.urlopen(req) charset = resp.info().get_content_charset() From aa7db525029739445738c41dc285b1b11dc28cff Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 16 Apr 2020 18:53:30 +0200 Subject: [PATCH 58/68] @pcrespov review: fix traefik version --- services/docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/docker-compose.yml b/services/docker-compose.yml index efd24db20a6..825b0edfd15 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -225,7 +225,7 @@ services: - default traefik: - image: traefik:v2.2 + image: traefik:v2.2.0 init: true command: - "--api=true" From 9c86555f187b9d624954e4ba84cbee85d63154c6 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 16 Apr 2020 18:54:15 +0200 Subject: [PATCH 59/68] @pcrespov remove comments --- services/docker-compose.yml | 9 --------- 1 file changed, 9 deletions(-) diff --git a/services/docker-compose.yml b/services/docker-compose.yml index 825b0edfd15..c1e0b709c8d 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -257,15 +257,6 @@ services: placement: constraints: - node.role == manager - # labels: - # - io.simcore.zone=internal_simcore_stack - # - traefik.enable=true - # traefik UI - # - traefik.http.routers.api_internal.service=api@internal - # - traefik.http.routers.api_internal.rule=PathPrefix(`/dashboard`) || PathPrefix(`/api`) - # - traefik.http.routers.api_internal.entrypoints=traefik_dashboard - # - traefik.http.routers.api_internal.middlewares=gzip@docker - # - traefik.http.services.api_internal.loadbalancer.server.port=8080 networks: - default - interactive_services_subnet From 68dd0c51ce5f59feadefc401501e986c467e0eef Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 16 Apr 2020 18:56:56 +0200 Subject: [PATCH 60/68] @pcrespov review: disable webserver reverse-proxy in prod as well --- .../simcore_service_webserver/config/server-defaults.yaml | 8 ++++++-- .../config/server-docker-prod.yaml | 7 +++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/services/web/server/src/simcore_service_webserver/config/server-defaults.yaml b/services/web/server/src/simcore_service_webserver/config/server-defaults.yaml index 257bf9eb7c7..e2040fe8b39 100644 --- a/services/web/server/src/simcore_service_webserver/config/server-defaults.yaml +++ b/services/web/server/src/simcore_service_webserver/config/server-defaults.yaml @@ -1,4 +1,4 @@ -version: '1.0' +version: "1.0" main: client_outdir: /usr/src/app/client host: 127.0.0.1 @@ -15,6 +15,10 @@ director: host: director port: 8001 version: v0 +application_proxy: + enabled: False +reverse_proxy: + enabled: False socketio: enabled: True db: @@ -50,7 +54,7 @@ login: registration_invitation_required: False registration_confirmation_required: True smtp: - sender: 'OSPARC support ' + sender: "OSPARC support " host: mail.foo.com port: 25 tls: False diff --git a/services/web/server/src/simcore_service_webserver/config/server-docker-prod.yaml b/services/web/server/src/simcore_service_webserver/config/server-docker-prod.yaml index 42c4214f993..09a188686db 100644 --- a/services/web/server/src/simcore_service_webserver/config/server-docker-prod.yaml +++ b/services/web/server/src/simcore_service_webserver/config/server-docker-prod.yaml @@ -53,7 +53,7 @@ login: registration_invitation_required: ${WEBSERVER_LOGIN_REGISTRATION_INVITATION_REQUIRED} registration_confirmation_required: ${WEBSERVER_LOGIN_REGISTRATION_CONFIRMATION_REQUIRED} smtp: - sender: 'OSPARC support ' + sender: "OSPARC support " host: ${SMTP_HOST} port: ${SMTP_PORT} tls: ${SMTP_TLS_ENABLED} @@ -67,6 +67,10 @@ catalog: host: ${CATALOG_HOST} port: 8000 version: v0 +application_proxy: + enabled: False +reverse_proxy: + enabled: False rest: version: v0 projects: @@ -79,4 +83,3 @@ activity: prometheus_host: ${WEBSERVER_PROMETHEUS_HOST} prometheus_port: ${WEBSERVER_PROMETHEUS_PORT} prometheus_api_version: ${WEBSERVER_PROMETHEUS_API_VERSION} -... From e94f3904ce15945ad4c2814ac75f8ee106d4b07c Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 16 Apr 2020 21:40:07 +0200 Subject: [PATCH 61/68] traefik UI/api available in both dev/prod changed default port to 80 set published port of traefik to 9081 set published port of webserver to random --- services/docker-compose.devel.yml | 13 ------------- services/docker-compose.local.yml | 28 ++++++++++++++++++++++++++-- services/docker-compose.yml | 4 ++-- 3 files changed, 28 insertions(+), 17 deletions(-) diff --git a/services/docker-compose.devel.yml b/services/docker-compose.devel.yml index b1e8af2375e..3fc1118a882 100644 --- a/services/docker-compose.devel.yml +++ b/services/docker-compose.devel.yml @@ -53,16 +53,3 @@ services: - ../packages:/devel/packages environment: - SC_BOOT_MODE=debug-ptvsd - - traefik: - deploy: - # currently only available in devel mode as this conflicts with the external traefik dashboard - labels: - - io.simcore.zone=internal_simcore_stack - - traefik.enable=true - # traefik UI - - traefik.http.routers.api_internal.service=api@internal - - traefik.http.routers.api_internal.rule=PathPrefix(`/dashboard`) || PathPrefix(`/api`) - - traefik.http.routers.api_internal.entrypoints=traefik_dashboard - - traefik.http.routers.api_internal.middlewares=gzip@docker - - traefik.http.services.api_internal.loadbalancer.server.port=8080 diff --git a/services/docker-compose.local.yml b/services/docker-compose.local.yml index b5a2e8d655a..e327bbbd142 100644 --- a/services/docker-compose.local.yml +++ b/services/docker-compose.local.yml @@ -40,7 +40,7 @@ services: environment: - SC_BOOT_MODE=${SC_BOOT_MODE:-default} ports: - - "9081:8080" + - "8080" - "3001:3000" postgres: @@ -81,6 +81,30 @@ services: traefik: ports: - target: 80 - published: 80 + published: 9081 - target: 8080 published: 8080 + deploy: + labels: + # traefik dashboard UI accessible through http://127.0.0.1:8080/dashboard/ + # traefik REST API accessible through for example http://127.0.0.1:8080/api/http/routers + - io.simcore.zone=${TRAEFIK_SIMCORE_ZONE} + - traefik.enable=true + - traefik.http.routers.${SWARM_STACK_NAME}_api_internal.service=api@internal + - traefik.http.routers.${SWARM_STACK_NAME}_api_internal.rule=PathPrefix(`/dashboard`) || PathPrefix(`/api`) + - traefik.http.routers.${SWARM_STACK_NAME}_api_internal.entrypoints=traefik_monitor + - traefik.http.routers.${SWARM_STACK_NAME}_api_internal.middlewares=gzip@docker + - traefik.http.services.${SWARM_STACK_NAME}_api_internal.loadbalancer.server.port=8080 + + whoami: + image: "containous/whoami" + # NOTE: helper service allows to better understand how the host gets forwarded inside the simcore stack + deploy: + labels: + # whoami accessible through http://127.0.0.1:8080/whoami + - io.simcore.zone=${TRAEFIK_SIMCORE_ZONE} + - traefik.enable=true + - traefik.http.services.${SWARM_STACK_NAME}_whoami.loadbalancer.server.port=80 + - traefik.http.routers.${SWARM_STACK_NAME}_whoami.rule=PathPrefix(`/whoami`) + - traefik.http.routers.${SWARM_STACK_NAME}_whoami.entrypoints=traefik_monitor + - traefik.http.routers.${SWARM_STACK_NAME}_whoami.middlewares=gzip@docker diff --git a/services/docker-compose.yml b/services/docker-compose.yml index c1e0b709c8d..4311ace964a 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -237,8 +237,8 @@ services: - "--metrics.prometheus.addServicesLabels=true" - "--entryPoints.metrics.address=:8082" - "--metrics.prometheus.entryPoint=metrics" - - "--entryPoints.http.address=:9081" - - "--entryPoints.traefik_dashboard.address=:8080" + - "--entryPoints.http.address=:80" + - "--entryPoints.traefik_monitor.address=:8080" - "--providers.docker.endpoint=unix:///var/run/docker.sock" - "--providers.docker.network=${SWARM_STACK_NAME}_default" - "--providers.docker.swarmMode=true" From 16c0c8aaf5cdb1238b1363ae38912b77f25957e3 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 16 Apr 2020 21:40:18 +0200 Subject: [PATCH 62/68] for devel do not ask for an invitation --- .env-devel | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.env-devel b/.env-devel index 5fdbf369962..63f00232450 100644 --- a/.env-devel +++ b/.env-devel @@ -47,8 +47,8 @@ TRACING_ZIPKIN_ENDPOINT=http://jaeger:9411 TRAEFIK_SIMCORE_ZONE=internal_simcore_stack -WEBSERVER_LOGIN_REGISTRATION_CONFIRMATION_REQUIRED=1 -WEBSERVER_LOGIN_REGISTRATION_INVITATION_REQUIRED=1 +WEBSERVER_LOGIN_REGISTRATION_CONFIRMATION_REQUIRED=0 +WEBSERVER_LOGIN_REGISTRATION_INVITATION_REQUIRED=0 # python3 -c "from cryptography.fernet import Fernet; print(Fernet.generate_key())" WEBSERVER_SESSION_SECRET_KEY=REPLACE ME with a key of at least length 32. WEBSERVER_STUDIES_ACCESS_ENABLED=0 From dfa78806a331d088a97e4f487d5dda7de7fd0fb6 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 16 Apr 2020 21:59:17 +0200 Subject: [PATCH 63/68] missing whoami in tests --- tests/swarm-deploy/test_swarm_runs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/swarm-deploy/test_swarm_runs.py b/tests/swarm-deploy/test_swarm_runs.py index ca1f9d98f48..0ba775c38d8 100644 --- a/tests/swarm-deploy/test_swarm_runs.py +++ b/tests/swarm-deploy/test_swarm_runs.py @@ -35,6 +35,7 @@ "postgres", "redis", "traefik", + "whoami", ] stack_name = os.environ.get("SWARM_STACK_NAME", "simcore") From a8bb468741774f3c6288fa5cd36e89185298f693 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Fri, 17 Apr 2020 09:03:04 +0200 Subject: [PATCH 64/68] codecov token is not required anymore for public repos --- .github/workflows/ci-testing-deploy.yml | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/.github/workflows/ci-testing-deploy.yml b/.github/workflows/ci-testing-deploy.yml index 4f67b7e893a..a862c5e279f 100644 --- a/.github/workflows/ci-testing-deploy.yml +++ b/.github/workflows/ci-testing-deploy.yml @@ -16,7 +16,6 @@ env: DOCKER_REGISTRY: ${{ secrets.DOCKER_REGISTRY }} DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }} - CODECOV_TOKEN: 0e81ae5f-1909-4035-921c-571693fdafd1 CC_TEST_REPORTER_ID: 21a72eb30476c870140b1576258873a41be6692f71bd9aebe812174b7d8f4b4e #enable buildkit DOCKER_BUILDKIT: 1 @@ -92,7 +91,6 @@ jobs: run: ./ci/github/unit-testing/api-gateway.bash test - uses: codecov/codecov-action@v1 with: - token: ${{ env.CODECOV_TOKEN }} #required flags: unittests #optional - name: prepare codeclimate coverage file run: | @@ -140,7 +138,6 @@ jobs: run: ./ci/github/unit-testing/catalog.bash test - uses: codecov/codecov-action@v1 with: - token: ${{ env.CODECOV_TOKEN }} #required flags: unittests #optional - name: prepare codeclimate coverage file run: | @@ -188,7 +185,6 @@ jobs: run: ./ci/github/unit-testing/director.bash test - uses: codecov/codecov-action@v1 with: - token: ${{ env.CODECOV_TOKEN }} #required flags: unittests #optional - name: prepare codeclimate coverage file run: | @@ -236,7 +232,6 @@ jobs: run: ./ci/github/unit-testing/sidecar.bash test - uses: codecov/codecov-action@v1 with: - token: ${{ env.CODECOV_TOKEN }} #required flags: unittests #optional - name: prepare codeclimate coverage file run: | @@ -283,7 +278,6 @@ jobs: # no coverage here?? # - uses: codecov/codecov-action@v1 # with: - # token: ${{ env.CODECOV_TOKEN }} #required # flags: unittests #optional unit-test-python-linting: @@ -355,7 +349,6 @@ jobs: run: ./ci/github/unit-testing/service-library.bash test - uses: codecov/codecov-action@v1 with: - token: ${{ env.CODECOV_TOKEN }} #required flags: unittests #optional - name: prepare codeclimate coverage file run: | @@ -403,7 +396,6 @@ jobs: run: ./ci/github/unit-testing/simcore-sdk.bash test - uses: codecov/codecov-action@v1 with: - token: ${{ env.CODECOV_TOKEN }} #required flags: unittests #optional - name: prepare codeclimate coverage file run: | @@ -451,7 +443,6 @@ jobs: run: ./ci/github/unit-testing/storage.bash test - uses: codecov/codecov-action@v1 with: - token: ${{ env.CODECOV_TOKEN }} #required flags: unittests #optional - name: prepare codeclimate coverage file run: | @@ -499,7 +490,6 @@ jobs: run: ./ci/github/unit-testing/webserver.bash test - uses: codecov/codecov-action@v1 with: - token: ${{ env.CODECOV_TOKEN }} #required flags: unittests #optional - name: prepare codeclimate coverage file run: | @@ -587,7 +577,6 @@ jobs: run: ./ci/github/integration-testing/webserver.bash clean_up - uses: codecov/codecov-action@v1 with: - token: ${{ env.CODECOV_TOKEN }} #required flags: integrationtests #optional - name: prepare codeclimate coverage file run: | @@ -645,7 +634,6 @@ jobs: run: ./ci/github/integration-testing/sidecar.bash clean_up - uses: codecov/codecov-action@v1 with: - token: ${{ env.CODECOV_TOKEN }} #required flags: integrationtests #optional - name: prepare codeclimate coverage file run: | @@ -703,7 +691,6 @@ jobs: run: ./ci/github/integration-testing/simcore-sdk.bash clean_up - uses: codecov/codecov-action@v1 with: - token: ${{ env.CODECOV_TOKEN }} #required flags: integrationtests - name: prepare codeclimate coverage file run: | From aa811c3ed4cf76d03e1f4aac52b470546af712b0 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Fri, 17 Apr 2020 10:23:47 +0200 Subject: [PATCH 65/68] added traefik service fixture --- .../src/pytest_simcore/traefik_service.py | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 packages/pytest-simcore/src/pytest_simcore/traefik_service.py diff --git a/packages/pytest-simcore/src/pytest_simcore/traefik_service.py b/packages/pytest-simcore/src/pytest_simcore/traefik_service.py new file mode 100644 index 00000000000..694c6efd383 --- /dev/null +++ b/packages/pytest-simcore/src/pytest_simcore/traefik_service.py @@ -0,0 +1,50 @@ +# pylint:disable=unused-variable +# pylint:disable=unused-argument +# pylint:disable=redefined-outer-name + +import os +from typing import Dict, Tuple + +import aiohttp +import pytest +import tenacity +from yarl import URL + +from servicelib.minio_utils import MinioRetryPolicyUponInitialization + +from .helpers.utils_docker import get_service_published_port + + +@pytest.fixture(scope="module") +def traefik_endpoints(docker_stack: Dict, devel_environ: Dict) -> Tuple[URL, URL]: + """get the endpoint for the given simcore_service. + NOTE: simcore_service defined as a parametrization + """ + assert f"simcore_traefik" in docker_stack["services"] + api_endpoint = f"127.0.0.1:{get_service_published_port('traefik', 8080)}" + webserver_endpoint = f"127.0.0.1:{get_service_published_port('traefik', 80)}" + return (URL(f"http://{api_endpoint}"), URL(f"http://{webserver_endpoint}")) + + +@pytest.fixture(scope="function") +async def traefik_service( + loop, traefik_endpoints: Tuple[URL, URL], docker_stack: Dict +) -> URL: + api_endpoint, webserver_endpoint = traefik_endpoints + await wait_till_traefik_responsive(api_endpoint) + yield traefik_endpoints + + +# HELPERS -- + +# TODO: this can be used by ANY of the simcore services! +@tenacity.retry(**MinioRetryPolicyUponInitialization().kwargs) +async def wait_till_traefik_responsive(api_endpoint: URL): + async with aiohttp.ClientSession() as session: + async with session.get(api_endpoint.with_path("/api/http/routers")) as resp: + assert resp.status == 200 + data = await resp.json() + for proxied_service in data: + assert "service" in proxied_service + if "webserver" in proxied_service["service"]: + assert proxied_service["status"] == "enabled" From b422a7b25f448f49137b08b5ac92a51d542459c0 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Fri, 17 Apr 2020 10:24:03 +0200 Subject: [PATCH 66/68] increase trials for when machine is a bit weak --- packages/service-library/src/servicelib/minio_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/service-library/src/servicelib/minio_utils.py b/packages/service-library/src/servicelib/minio_utils.py index 32ace235d87..211818fe923 100644 --- a/packages/service-library/src/servicelib/minio_utils.py +++ b/packages/service-library/src/servicelib/minio_utils.py @@ -11,7 +11,7 @@ class MinioRetryPolicyUponInitialization: """ WAIT_SECS = 2 - ATTEMPTS_COUNT = 20 + ATTEMPTS_COUNT = 40 def __init__(self, logger: Optional[logging.Logger] = None): logger = logger or log From a62691558d7a54e40edafbcd4859c55277ab5f9e Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Fri, 17 Apr 2020 10:24:11 +0200 Subject: [PATCH 67/68] use traefik service --- tests/swarm-deploy/conftest.py | 1 + tests/swarm-deploy/test_swarm_runs.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/swarm-deploy/conftest.py b/tests/swarm-deploy/conftest.py index ef42c917849..d722a4eeed6 100644 --- a/tests/swarm-deploy/conftest.py +++ b/tests/swarm-deploy/conftest.py @@ -19,6 +19,7 @@ "pytest_simcore.rabbit_service", "pytest_simcore.postgres_service", "pytest_simcore.minio_service", + "pytest_simcore.traefik_service" # "pytest_simcore.simcore_storage_service", ] log = logging.getLogger(__name__) diff --git a/tests/swarm-deploy/test_swarm_runs.py b/tests/swarm-deploy/test_swarm_runs.py index 0ba775c38d8..6bffe2145ba 100644 --- a/tests/swarm-deploy/test_swarm_runs.py +++ b/tests/swarm-deploy/test_swarm_runs.py @@ -14,6 +14,7 @@ import pytest from docker import DockerClient from docker.models.services import Service +from yarl import URL logger = logging.getLogger(__name__) @@ -144,7 +145,7 @@ def test_core_service_running( ) -def test_check_serve_root(make_up_prod: Dict): +def test_check_serve_root(loop, make_up_prod: Dict, traefik_service: URL): req = urllib.request.Request("http://127.0.0.1:9081/") try: resp = urllib.request.urlopen(req) From c2d57c10229e63c3a7002aa58f1640778261604a Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Fri, 17 Apr 2020 10:32:10 +0200 Subject: [PATCH 68/68] lint --- packages/pytest-simcore/src/pytest_simcore/traefik_service.py | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/pytest-simcore/src/pytest_simcore/traefik_service.py b/packages/pytest-simcore/src/pytest_simcore/traefik_service.py index 694c6efd383..4cbdccab45c 100644 --- a/packages/pytest-simcore/src/pytest_simcore/traefik_service.py +++ b/packages/pytest-simcore/src/pytest_simcore/traefik_service.py @@ -2,7 +2,6 @@ # pylint:disable=unused-argument # pylint:disable=redefined-outer-name -import os from typing import Dict, Tuple import aiohttp