Skip to content

[maintenance] diagnose long response latency #1421

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,10 @@ SWARM_HOSTS = $(shell docker node ls --format="{{.Hostname}}" 2>$(if $(IS_WIN),N
.PHONY: build build-nc rebuild build-devel build-devel-nc build-devel-kit build-devel-x build-cache build-cache-kit build-cache-x build-cache-nc build-kit build-x

define _docker_compose_build
export BUILD_TARGET=$(if $(findstring -devel,$@),development,$(if $(findstring -cache,$@),cache,production)); \
export BUILD_TARGET=$(if $(findstring -devel,$@),development,$(if $(findstring -cache,$@),cache,production));\
$(if $(findstring -x,$@),\
pushd services; docker buildx bake --file docker-compose-build.yml; popd;,\
docker-compose -f services/docker-compose-build.yml build $(if $(findstring -nc,$@),--no-cache,) --parallel;\
docker-compose -f services/docker-compose-build.yml build $(if $(findstring -nc,$@),--no-cache,) $(if $(target),,--parallel)\
)
endef

Expand Down
17 changes: 16 additions & 1 deletion api/specs/webserver/openapi-diagnostics.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,22 @@ paths:
get:
tags:
- maintenance
summary: Service health check
summary: run check
operationId: check_running
responses:
"200":
description: Service information
content:
application/json:
schema:
$ref: "./components/schemas/health_check.yaml#/HealthCheckEnveloped"
default:
$ref: "#/components/responses/DefaultErrorResponse"
/health:
get:
tags:
- maintenance
summary: health check
operationId: check_health
responses:
"200":
Expand Down
3 changes: 3 additions & 0 deletions api/specs/webserver/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ paths:
# DIAGNOSTICS ---------------------------------------------------------
/:
$ref: "./openapi-diagnostics.yaml#/paths/~1"

/health:
$ref: "./openapi-diagnostics.yaml#/paths/~1health"

/check/{action}:
$ref: "./openapi-diagnostics.yaml#/paths/~1check~1{action}"
Expand Down
2 changes: 1 addition & 1 deletion packages/postgres-database/requirements/_test.in
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
-r _migration.txt

# fixtures
pyaml
pyyaml
aiopg[sa]
faker

Expand Down
11 changes: 7 additions & 4 deletions packages/postgres-database/requirements/_test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# This file is autogenerated by pip-compile
# To update, run:
#
# pip-compile --build-isolation --output-file=_test.txt _test.in
# pip-compile requirements/_test.in
#
aiohttp==3.6.2 # via pytest-aiohttp
aiopg[sa]==1.0.0 # via -r requirements/_test.in
Expand All @@ -21,7 +21,9 @@ coveralls==1.11.1 # via -r requirements/_test.in
docker==4.2.0 # via -r requirements/_migration.txt
docopt==0.6.2 # via coveralls
faker==4.0.2 # via -r requirements/_test.in
idna-ssl==1.1.0 # via aiohttp
idna==2.8 # via -r requirements/_migration.txt, requests, yarl
importlib-metadata==1.6.0 # via pluggy, pytest
isort==4.3.21 # via pylint
lazy-object-proxy==1.4.3 # via astroid
mako==1.0.12 # via -r requirements/_migration.txt, alembic
Expand All @@ -35,7 +37,6 @@ pip-tools==4.5.1 # via -r requirements/../../../requirements.txt
pluggy==0.13.1 # via pytest
psycopg2-binary==2.8.4 # via -r requirements/_migration.txt, aiopg, sqlalchemy
py==1.8.1 # via pytest
pyaml==20.3.1 # via -r requirements/_test.in
pylint==2.4.4 # via -r requirements/../../../requirements.txt, -r requirements/_test.in
pyparsing==2.4.6 # via packaging
pytest-aiohttp==0.3.0 # via -r requirements/_test.in
Expand All @@ -46,7 +47,7 @@ pytest-runner==5.2 # via -r requirements/_test.in
pytest==5.3.5 # via -r requirements/_test.in, pytest-aiohttp, pytest-cov, pytest-instafail
python-dateutil==2.8.0 # via -r requirements/_migration.txt, alembic, faker
python-editor==1.0.4 # via -r requirements/_migration.txt, alembic
pyyaml==5.3 # via pyaml
pyyaml==5.3.1 # via -r requirements/_test.in
regex==2020.2.20 # via black
requests==2.22.0 # via -r requirements/_migration.txt, coveralls, docker
rope==0.16.0 # via -r requirements/../../../requirements.txt
Expand All @@ -55,9 +56,11 @@ sqlalchemy[postgresql_psycopg2binary]==1.3.5 # via -r requirements/_migration.t
tenacity==6.1.0 # via -r requirements/_migration.txt
text-unidecode==1.3 # via faker
toml==0.10.0 # via black
typed-ast==1.4.1 # via black
typed-ast==1.4.1 # via astroid, black
typing-extensions==3.7.4.2 # via aiohttp
urllib3==1.25.8 # via -r requirements/_migration.txt, requests
wcwidth==0.1.8 # via pytest
websocket-client==0.56.0 # via -r requirements/_migration.txt, docker
wrapt==1.11.2 # via astroid
yarl==1.4.2 # via -r requirements/_migration.txt, aiohttp
zipp==3.1.0 # via importlib-metadata
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
async def assert_status(
response: web.Response, expected_cls: web.HTTPException, expected_msg: str = None
):
"""
Asserts for enveloped responses
"""
data, error = unwrap_envelope(await response.json())
assert response.status == expected_cls.status_code, (data, error)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def app_module_setup(

See packages/service-library/tests/test_application_setup.py

:param module_name: typically __name__ (automaticaly removes '.__init__')
:param module_name: typically __name__
:param depends: list of module_names that must be called first, defaults to None
:param config_section: explicit configuration section, defaults to None (i.e. the name of the module, or last entry of the name if dotted)
:param config_enabled: option in config to enable, defaults to None which is '$(module-section).enabled' (config_section and config_enabled are mutually exclusive)
Expand Down Expand Up @@ -122,7 +122,7 @@ def _get(cfg_, parts):
is_enabled = _get(cfg, config_enabled.split("."))
except KeyError as ee:
raise ApplicationSetupError(
f"Cannot find '{config_enabled}' in app config at [ {ee} ]"
f"Cannot find required option '{config_enabled}' in app config's section '{ee}'"
)

if not is_enabled:
Expand Down
46 changes: 30 additions & 16 deletions packages/service-library/src/servicelib/monitoring.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""

UNDER DEVELOPMENT for issue #784
UNDER DEVELOPMENT for issue #784 (see web/server/diagnostics_monitoring.py)

Based on https://github.com/amitsaha/aiohttp-prometheus

Expand All @@ -20,6 +20,14 @@
log = logging.getLogger(__name__)


async def metrics_handler(_request: web.Request):
# TODO: prometheus_client.generate_latest blocking! no asyhc solutin?
# TODO: prometheus_client access to a singleton registry! can be instead created and pass to every metric wrapper
resp = web.Response(body=prometheus_client.generate_latest())
resp.content_type = CONTENT_TYPE_LATEST
return resp


def middleware_factory(app_name):
@web.middleware
async def middleware_handler(request: web.Request, handler):
Expand Down Expand Up @@ -68,27 +76,33 @@ async def middleware_handler(request: web.Request, handler):

return resp

middleware_handler.__middleware_name__ = __name__
middleware_handler.__middleware_name__ = __name__ # SEE check_outermost_middleware
return middleware_handler


async def metrics(_request):
# TODO: NOT async!
# prometheus_client access to a singleton registry!
resp = web.Response(body=prometheus_client.generate_latest())
resp.content_type = CONTENT_TYPE_LATEST
return resp
async def check_outermost_middleware(
app: web.Application, *, log_failure: bool = True
) -> bool:
try:
ok = app.middlewares[0].__middleware_name__ == __name__
except (IndexError, AttributeError):
ok = False

if not ok and log_failure:

def _view(m) -> str:
try:
return f"{m.__middleware_name__} [{m}]"
except AttributeError:
return str(m)

async def check_outermost_middleware(app: web.Application):
m = app.middlewares[0]
ok = m and hasattr(m, "__middleware_name__") and m.__middleware_name__ == __name__
if not ok:
# TODO: name all middleware and list middleware in log
log.critical(
"Monitoring middleware expected in the outermost layer."
"TIP: Check setup order"
"Monitoring middleware expected in the outermost layer. "
"Middleware stack: %s. "
"TIP: Check setup order",
[_view(m) for m in app.middlewares],
)
return ok


def setup_monitoring(app: web.Application, app_name: str):
Expand Down Expand Up @@ -118,7 +132,7 @@ def setup_monitoring(app: web.Application, app_name: str):
app.middlewares.insert(0, middleware_factory(app_name))

# FIXME: this in the front-end has to be protected!
app.router.add_get("/metrics", metrics)
app.router.add_get("/metrics", metrics_handler)

# Checks that middleware is in the outermost layer
app.on_startup.append(check_outermost_middleware)
Expand Down
70 changes: 46 additions & 24 deletions packages/service-library/src/servicelib/rest_middlewares.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
)
from .rest_utils import EnvelopeFactory
from .rest_validators import OpenApiValidator
from .utils import is_production_environ

DEFAULT_API_VERSION = "v0"

Expand All @@ -31,26 +32,34 @@ def is_api_request(request: web.Request, api_version: str) -> bool:
return request.path.startswith(base_path)


def _process_and_raise_unexpected_error(request: web.BaseRequest, err: Exception):
# FIXME: send info + trace to client ONLY in debug mode!!!
resp = create_error_response(
[err,], "Unexpected Server error", web.HTTPInternalServerError
)

logger.exception(
'Unexpected server error "%s" from access: %s "%s %s". Responding with status %s',
type(err),
request.remote,
request.method,
request.path,
resp.status,
)
raise resp

def error_middleware_factory(
api_version: str = DEFAULT_API_VERSION, log_exceptions=True
):
_is_prod: bool = is_production_environ()

def _process_and_raise_unexpected_error(request: web.BaseRequest, err: Exception):
resp = create_error_response(
[err,],
"Unexpected Server error",
web.HTTPInternalServerError,
skip_internal_error_details=_is_prod,
)

if log_exceptions:
logger.error(
'Unexpected server error "%s" from access: %s "%s %s". Responding with status %s',
type(err),
request.remote,
request.method,
request.path,
resp.status,
exc_info=err,
stack_info=True,
)
raise resp

def error_middleware_factory(api_version: str = DEFAULT_API_VERSION):
@web.middleware
async def _middleware(request: web.Request, handler):
async def _middleware_handler(request: web.Request, handler):
"""
Ensure all error raised are properly enveloped and json responses
"""
Expand Down Expand Up @@ -98,12 +107,15 @@ async def _middleware(request: web.Request, handler):
except Exception as err: # pylint: disable=broad-except
_process_and_raise_unexpected_error(request, err)

return _middleware
# adds identifier (mostly for debugging)
_middleware_handler.__middleware_name__ = f"{__name__}.error_{api_version}"

return _middleware_handler


def validate_middleware_factory(api_version: str = DEFAULT_API_VERSION):
@web.middleware
async def _middleware(request: web.Request, handler):
async def _middleware_handler(request: web.Request, handler):
"""
Validates requests against openapi specs and extracts body, params, etc ...
Validate response against openapi specs
Expand Down Expand Up @@ -141,12 +153,17 @@ async def _middleware(request: web.Request, handler):

return response

return _middleware
# adds identifier (mostly for debugging)
_middleware_handler.__middleware_name__ = f"{__name__}.validate_{api_version}"

return _middleware_handler


def envelope_middleware_factory(api_version: str = DEFAULT_API_VERSION):
_is_prod: bool = is_production_environ()

@web.middleware
async def _middleware(request: web.Request, handler):
async def _middleware_handler(request: web.Request, handler):
"""
Ensures all responses are enveloped as {'data': .. , 'error', ...} in json
"""
Expand All @@ -156,13 +173,18 @@ async def _middleware(request: web.Request, handler):
resp = await handler(request)

if not isinstance(resp, web.Response):
response = create_data_response(data=resp)
response = create_data_response(
data=resp, skip_internal_error_details=_is_prod,
)
else:
# Enforced by user. Should check it is json?
response = resp
return response

return _middleware
# adds identifier (mostly for debugging)
_middleware_handler.__middleware_name__ = f"{__name__}.envelope_{api_version}"

return _middleware_handler


def append_rest_middlewares(
Expand Down
Loading