Skip to content

Commit 15ebbad

Browse files
author
Pedro Crespo
committed
Logged unhealth decision
1 parent 2675e4b commit 15ebbad

File tree

2 files changed

+20
-8
lines changed

2 files changed

+20
-8
lines changed
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,34 @@
11

2+
import logging
23
import os
3-
from typing import List
4+
from typing import List, Optional
45

56
import attr
67
from aiohttp import web
7-
import logging
88

99
from servicelib import monitor_slow_callbacks
1010

1111
log = logging.getLogger(__name__)
1212

1313
INCIDENTS_REGISTRY_KEY = f"{__name__}.registry"
1414

15-
AIODEBUG_SLOW_DURATION_SECS = float(os.environ.get("AIODEBUG_SLOW_DURATION_SECS", 0.1))
15+
AIODEBUG_SLOW_DURATION_SECS = float(os.environ.get("AIODEBUG_SLOW_DURATION_SECS", 0.2))
1616
MAX_DELAY_SECS_ALLOWED = 300 * AIODEBUG_SLOW_DURATION_SECS
1717

1818
@attr.s(auto_attribs=True)
1919
class IncidentsRegistry:
20+
# FIXME: this needs a limit to keep worst cases?
2021
slow_callbaks: List[monitor_slow_callbacks.Incident]
2122

22-
@property
23-
def max_delay(self) -> float:
23+
def eval_max_delay(self) -> float:
2424
return max( incident.delay_secs for incident in self.slow_callbaks )
2525

2626

2727

28-
def setup_diagnostics(app: web.Application):
28+
def setup_diagnostics(app: web.Application, *, max_delay_allowed: Optional[float]=None):
2929
# NOTE: Every task blocking > AIODEBUG_SLOW_DURATION_SECS secs is considered slow and logged as warning
30-
incidents = monitor_slow_callbacks.enable(MAX_DELAY_SECS_ALLOWED)
30+
if max_delay_allowed is None:
31+
max_delay_allowed = MAX_DELAY_SECS_ALLOWED
32+
incidents = monitor_slow_callbacks.enable(max_delay_allowed)
3133

3234
app[INCIDENTS_REGISTRY_KEY] = IncidentsRegistry(incidents)

services/web/server/src/simcore_service_webserver/rest_handlers.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
"""
55
import asyncio
66
from typing import Optional
7+
import logging
78

89
from aiohttp import web
910

@@ -20,12 +21,21 @@
2021
from .utils import get_task_info, get_tracemalloc_info
2122

2223

24+
log = logging.getLogger(__name__)
25+
26+
2327
async def check_health(request: web.Request):
2428

2529
# diagnostics of incidents
2630
incidents: Optional[IncidentsRegistry] = request.app.get(INCIDENTS_REGISTRY_KEY)
2731
if incidents:
28-
if incidents.max_delay > MAX_DELAY_SECS_ALLOWED:
32+
max_delay: float = incidents.eval_max_delay()
33+
if max_delay > MAX_DELAY_SECS_ALLOWED:
34+
log.error(
35+
"Unhealthy service: %s secs delay [%s secs allowed]",
36+
max_delay,
37+
MAX_DELAY_SECS_ALLOWED,
38+
)
2939
raise web.HTTPServiceUnavailable()
3040

3141
data = {

0 commit comments

Comments
 (0)