Skip to content

Commit fcc46ca

Browse files
committed
Global exceptions were silencing CancellationError
Cleanup
1 parent 917cef2 commit fcc46ca

File tree

2 files changed

+40
-38
lines changed

2 files changed

+40
-38
lines changed

services/web/server/src/simcore_service_webserver/resource_manager/config.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,15 @@
77

88
import trafaret as T
99
from aiohttp.web import Application
10-
from pydantic import BaseSettings, Field, PositiveInt
11-
1210
from models_library.settings.redis import RedisConfig
11+
from pydantic import BaseSettings, Field, PositiveInt
1312
from servicelib.application_keys import APP_CONFIG_KEY
1413

1514
CONFIG_SECTION_NAME = "resource_manager"
1615
APP_CLIENT_REDIS_CLIENT_KEY = __name__ + ".resource_manager.redis_client"
1716
APP_CLIENT_REDIS_LOCK_KEY = __name__ + ".resource_manager.redis_lock"
1817
APP_CLIENT_SOCKET_REGISTRY_KEY = __name__ + ".resource_manager.registry"
1918
APP_RESOURCE_MANAGER_TASKS_KEY = __name__ + ".resource_manager.tasks.key"
20-
APP_GARBAGE_COLLECTOR_KEY = __name__ + ".resource_manager.garbage_collector_key"
2119

2220

2321
# lock names and format strings

services/web/server/src/simcore_service_webserver/resource_manager/garbage_collector.py

Lines changed: 39 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,11 @@
22
import logging
33
from contextlib import suppress
44
from itertools import chain
5-
from typing import Dict, List, Tuple
5+
from typing import Dict, List, Optional, Tuple
66

7+
import psycopg2
78
from aiohttp import web
8-
from aiopg.sa.result import RowProxy
99
from aioredlock import Aioredlock
10-
1110
from servicelib.observer import emit
1211
from servicelib.utils import logged_gather
1312
from simcore_service_webserver import users_exceptions
@@ -43,7 +42,6 @@
4342

4443
from .config import (
4544
APP_CLIENT_REDIS_LOCK_KEY,
46-
APP_GARBAGE_COLLECTOR_KEY,
4745
GUEST_USER_RC_LOCK_FORMAT,
4846
get_garbage_collector_interval,
4947
)
@@ -52,23 +50,25 @@
5250
logger = logging.getLogger(__name__)
5351

5452

55-
async def setup_garbage_collector_task(app: web.Application):
56-
loop = asyncio.get_event_loop()
57-
app[APP_GARBAGE_COLLECTOR_KEY] = loop.create_task(garbage_collector_task(app))
58-
yield
59-
task = app[APP_GARBAGE_COLLECTOR_KEY]
60-
task.cancel()
61-
await task
53+
def setup_garbage_collector(app: web.Application):
54+
async def _setup_background_task(app: web.Application):
55+
# on_startup
56+
loop = asyncio.get_event_loop()
57+
cgp_task = loop.create_task(collect_garbage_periodically(app))
6258

59+
yield
6360

64-
def setup_garbage_collector(app: web.Application):
65-
app.cleanup_ctx.append(setup_garbage_collector_task)
61+
# on_cleanup
62+
with suppress(asyncio.CancelledError):
63+
cgp_task.cancel()
64+
await cgp_task
6665

66+
app.cleanup_ctx.append(_setup_background_task)
6767

68-
async def garbage_collector_task(app: web.Application):
69-
keep_alive = True
7068

71-
while keep_alive:
69+
async def collect_garbage_periodically(app: web.Application):
70+
71+
while True:
7272
logger.info("Starting garbage collector...")
7373
try:
7474
interval = get_garbage_collector_interval(app)
@@ -77,8 +77,9 @@ async def garbage_collector_task(app: web.Application):
7777
await asyncio.sleep(interval)
7878

7979
except asyncio.CancelledError:
80-
keep_alive = False
8180
logger.info("Garbage collection task was cancelled, it will not restart!")
81+
# do not catch Cancellation errors
82+
raise
8283

8384
except Exception: # pylint: disable=broad-except
8485
logger.warning(
@@ -254,12 +255,10 @@ async def remove_disconnected_user_resources(
254255
resource_name,
255256
keys_to_update,
256257
)
257-
with suppress(asyncio.CancelledError):
258-
on_released_tasks = [
259-
registry.remove_resource(key, resource_name)
260-
for key in keys_to_update
261-
]
262-
await logged_gather(*on_released_tasks, reraise=False)
258+
on_released_tasks = [
259+
registry.remove_resource(key, resource_name) for key in keys_to_update
260+
]
261+
await logged_gather(*on_released_tasks, reraise=False)
263262

264263
# NOTE:
265264
# - if releasing a resource (1) fails, annotations in registry allows GC to try in next round
@@ -357,8 +356,8 @@ async def remove_orphaned_services(
357356
logger.info("Will remove service %s", interactive_service["service_host"])
358357
try:
359358
await stop_service(app, node_id)
360-
except (ServiceNotFoundError, DirectorException) as e:
361-
logger.warning("Error while stopping service: %s", e)
359+
except (ServiceNotFoundError, DirectorException) as err:
360+
logger.warning("Error while stopping service: %s", err)
362361

363362
logger.info("Finished orphaned services removal")
364363

@@ -375,11 +374,12 @@ async def remove_guest_user_with_all_its_resources(
375374
try:
376375
await remove_all_projects_for_user(app=app, user_id=user_id)
377376
await remove_user(app=app, user_id=user_id)
378-
except Exception as e: # pylint: disable=broad-except
379-
logger.warning("%s", e)
377+
378+
except psycopg2.DatabaseError:
380379
logger.warning(
381380
"Could not remove GUEST with id=%s. Check the logs above for details",
382381
user_id,
382+
exc_info=True,
383383
)
384384

385385

@@ -405,7 +405,7 @@ async def remove_all_projects_for_user(app: web.Application, user_id: int) -> No
405405
user_id,
406406
)
407407
return
408-
user_primary_gid: str = str(project_owner["primary_gid"])
408+
user_primary_gid = int(project_owner["primary_gid"])
409409

410410
# fetch all projects for the user
411411
user_project_uuids = await app[
@@ -472,8 +472,8 @@ async def get_new_project_owner_gid(
472472
project_uuid: str,
473473
user_id: int,
474474
user_primary_gid: int,
475-
project: RowProxy,
476-
) -> str:
475+
project: Dict,
476+
) -> Optional[int]:
477477
"""Goes through the access rights and tries to find a new suitable owner.
478478
The first viable user is selected as a new owner.
479479
In order to become a new owner the user must have write access right.
@@ -533,7 +533,7 @@ async def get_new_project_owner_gid(
533533

534534
async def fetch_new_project_owner_from_groups(
535535
app: web.Application, standard_groups: Dict, user_id: int
536-
) -> int:
536+
) -> Optional[int]:
537537
"""Iterate over all the users in a group and if the users exists in the db
538538
return its gid"""
539539

@@ -554,6 +554,7 @@ async def fetch_new_project_owner_from_groups(
554554
"Could not find new owner '%s' will try a new one",
555555
possible_user_id,
556556
)
557+
557558
return None
558559

559560

@@ -562,17 +563,19 @@ async def replace_current_owner(
562563
project_uuid: str,
563564
user_primary_gid: int,
564565
new_project_owner_gid: str,
565-
project: RowProxy,
566+
project: Dict,
566567
) -> None:
567568
try:
568569
new_project_owner_id = await get_user_id_from_gid(
569570
app=app, primary_gid=int(new_project_owner_gid)
570571
)
571-
except Exception: # pylint: disable=broad-except
572+
573+
except psycopg2.DatabaseError:
572574
logger.exception(
573575
"Could not recover new user id from gid %s", new_project_owner_gid
574576
)
575577
return
578+
576579
# the result might me none
577580
if new_project_owner_id is None:
578581
logger.warning(
@@ -588,13 +591,14 @@ async def replace_current_owner(
588591
str(new_project_owner_gid)
589592
] = ProjectAccessRights.OWNER.value
590593
logger.error("Syncing back project %s", project)
594+
591595
# syncing back project data
592596
try:
593597
await app[APP_PROJECT_DBAPI].update_project_without_enforcing_checks(
594598
project_data=project,
595599
project_uuid=project_uuid,
596600
)
597-
except Exception: # pylint: disable=broad-except
601+
except psycopg2.DatabaseError:
598602
logger.exception(
599603
"Could not remove old owner and replaced it with user %s",
600604
new_project_owner_id,
@@ -605,7 +609,7 @@ async def remove_user(app: web.Application, user_id: int) -> None:
605609
"""Tries to remove a user, if the users still exists a warning message will be displayed"""
606610
try:
607611
await delete_user(app, user_id)
608-
except Exception: # pylint: disable=broad-except
612+
except psycopg2.DatabaseError:
609613
logger.warning(
610614
"User '%s' still has some projects, could not be deleted", user_id
611615
)

0 commit comments

Comments
 (0)