File tree 2 files changed +20
-8
lines changed
services/web/server/src/simcore_service_webserver
2 files changed +20
-8
lines changed Original file line number Diff line number Diff line change 1
1
2
+ import logging
2
3
import os
3
- from typing import List
4
+ from typing import List , Optional
4
5
5
6
import attr
6
7
from aiohttp import web
7
- import logging
8
8
9
9
from servicelib import monitor_slow_callbacks
10
10
11
11
log = logging .getLogger (__name__ )
12
12
13
13
INCIDENTS_REGISTRY_KEY = f"{ __name__ } .registry"
14
14
15
- AIODEBUG_SLOW_DURATION_SECS = float (os .environ .get ("AIODEBUG_SLOW_DURATION_SECS" , 0.1 ))
15
+ AIODEBUG_SLOW_DURATION_SECS = float (os .environ .get ("AIODEBUG_SLOW_DURATION_SECS" , 0.2 ))
16
16
MAX_DELAY_SECS_ALLOWED = 300 * AIODEBUG_SLOW_DURATION_SECS
17
17
18
18
@attr .s (auto_attribs = True )
19
19
class IncidentsRegistry :
20
+ # FIXME: this needs a limit to keep worst cases?
20
21
slow_callbaks : List [monitor_slow_callbacks .Incident ]
21
22
22
- @property
23
- def max_delay (self ) -> float :
23
+ def eval_max_delay (self ) -> float :
24
24
return max ( incident .delay_secs for incident in self .slow_callbaks )
25
25
26
26
27
27
28
- def setup_diagnostics (app : web .Application ):
28
+ def setup_diagnostics (app : web .Application , * , max_delay_allowed : Optional [ float ] = None ):
29
29
# NOTE: Every task blocking > AIODEBUG_SLOW_DURATION_SECS secs is considered slow and logged as warning
30
- incidents = monitor_slow_callbacks .enable (MAX_DELAY_SECS_ALLOWED )
30
+ if max_delay_allowed is None :
31
+ max_delay_allowed = MAX_DELAY_SECS_ALLOWED
32
+ incidents = monitor_slow_callbacks .enable (max_delay_allowed )
31
33
32
34
app [INCIDENTS_REGISTRY_KEY ] = IncidentsRegistry (incidents )
Original file line number Diff line number Diff line change 4
4
"""
5
5
import asyncio
6
6
from typing import Optional
7
+ import logging
7
8
8
9
from aiohttp import web
9
10
20
21
from .utils import get_task_info , get_tracemalloc_info
21
22
22
23
24
+ log = logging .getLogger (__name__ )
25
+
26
+
23
27
async def check_health (request : web .Request ):
24
28
25
29
# diagnostics of incidents
26
30
incidents : Optional [IncidentsRegistry ] = request .app .get (INCIDENTS_REGISTRY_KEY )
27
31
if incidents :
28
- if incidents .max_delay > MAX_DELAY_SECS_ALLOWED :
32
+ max_delay : float = incidents .eval_max_delay ()
33
+ if max_delay > MAX_DELAY_SECS_ALLOWED :
34
+ log .error (
35
+ "Unhealthy service: %s secs delay [%s secs allowed]" ,
36
+ max_delay ,
37
+ MAX_DELAY_SECS_ALLOWED ,
38
+ )
29
39
raise web .HTTPServiceUnavailable ()
30
40
31
41
data = {
You can’t perform that action at this time.
0 commit comments