Skip to content

Commit abafb5f

Browse files
committed
Global exceptions were silencing CancellationError
Cleanup
1 parent 2fa4dc4 commit abafb5f

File tree

2 files changed

+40
-37
lines changed

2 files changed

+40
-37
lines changed

services/web/server/src/simcore_service_webserver/resource_manager/config.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,15 @@
77

88
import trafaret as T
99
from aiohttp.web import Application
10-
from pydantic import BaseSettings, Field, PositiveInt
11-
1210
from models_library.settings.redis import RedisConfig
11+
from pydantic import BaseSettings, Field, PositiveInt
1312
from servicelib.application_keys import APP_CONFIG_KEY
1413

1514
CONFIG_SECTION_NAME = "resource_manager"
1615
APP_CLIENT_REDIS_CLIENT_KEY = __name__ + ".resource_manager.redis_client"
1716
APP_CLIENT_REDIS_LOCK_KEY = __name__ + ".resource_manager.redis_lock"
1817
APP_CLIENT_SOCKET_REGISTRY_KEY = __name__ + ".resource_manager.registry"
1918
APP_RESOURCE_MANAGER_TASKS_KEY = __name__ + ".resource_manager.tasks.key"
20-
APP_GARBAGE_COLLECTOR_KEY = __name__ + ".resource_manager.garbage_collector_key"
2119

2220

2321
# lock names and format strings

services/web/server/src/simcore_service_webserver/resource_manager/garbage_collector.py

+39-34
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22
import logging
33
from contextlib import suppress
44
from itertools import chain
5-
from typing import Dict, List, Tuple
5+
from typing import Dict, List, Optional, Tuple
66

7+
import psycopg2
78
from aiohttp import web
8-
from aiopg.sa.result import RowProxy
99
from aioredlock import Aioredlock
1010
from servicelib.observer import emit
1111
from servicelib.utils import logged_gather
@@ -42,7 +42,6 @@
4242

4343
from .config import (
4444
APP_CLIENT_REDIS_LOCK_KEY,
45-
APP_GARBAGE_COLLECTOR_KEY,
4645
GUEST_USER_RC_LOCK_FORMAT,
4746
get_garbage_collector_interval,
4847
)
@@ -51,23 +50,25 @@
5150
logger = logging.getLogger(__name__)
5251

5352

54-
async def setup_garbage_collector_task(app: web.Application):
55-
loop = asyncio.get_event_loop()
56-
app[APP_GARBAGE_COLLECTOR_KEY] = loop.create_task(garbage_collector_task(app))
57-
yield
58-
task = app[APP_GARBAGE_COLLECTOR_KEY]
59-
task.cancel()
60-
await task
53+
def setup_garbage_collector(app: web.Application):
54+
async def _setup_background_task(app: web.Application):
55+
# on_startup
56+
loop = asyncio.get_event_loop()
57+
cgp_task = loop.create_task(collect_garbage_periodically(app))
6158

59+
yield
6260

63-
def setup_garbage_collector(app: web.Application):
64-
app.cleanup_ctx.append(setup_garbage_collector_task)
61+
# on_cleanup
62+
with suppress(asyncio.CancelledError):
63+
cgp_task.cancel()
64+
await cgp_task
65+
66+
app.cleanup_ctx.append(_setup_background_task)
6567

6668

67-
async def garbage_collector_task(app: web.Application):
68-
keep_alive = True
69+
async def collect_garbage_periodically(app: web.Application):
6970

70-
while keep_alive:
71+
while True:
7172
logger.info("Starting garbage collector...")
7273
try:
7374
interval = get_garbage_collector_interval(app)
@@ -76,8 +77,9 @@ async def garbage_collector_task(app: web.Application):
7677
await asyncio.sleep(interval)
7778

7879
except asyncio.CancelledError:
79-
keep_alive = False
8080
logger.info("Garbage collection task was cancelled, it will not restart!")
81+
# do not catch Cancellation errors
82+
raise
8183

8284
except Exception: # pylint: disable=broad-except
8385
logger.warning(
@@ -253,12 +255,10 @@ async def remove_disconnected_user_resources(
253255
resource_name,
254256
keys_to_update,
255257
)
256-
with suppress(asyncio.CancelledError):
257-
on_released_tasks = [
258-
registry.remove_resource(key, resource_name)
259-
for key in keys_to_update
260-
]
261-
await logged_gather(*on_released_tasks, reraise=False)
258+
on_released_tasks = [
259+
registry.remove_resource(key, resource_name) for key in keys_to_update
260+
]
261+
await logged_gather(*on_released_tasks, reraise=False)
262262

263263
# NOTE:
264264
# - if releasing a resource (1) fails, annotations in registry allows GC to try in next round
@@ -373,8 +373,8 @@ async def remove_orphaned_services(
373373
logger.info("Will remove service %s", service_host)
374374
try:
375375
await stop_service(app, node_id)
376-
except (ServiceNotFoundError, DirectorException) as e:
377-
logger.warning("Error while stopping service: %s", e)
376+
except (ServiceNotFoundError, DirectorException) as err:
377+
logger.warning("Error while stopping service: %s", err)
378378

379379
logger.info("Finished orphaned services removal")
380380

@@ -391,11 +391,12 @@ async def remove_guest_user_with_all_its_resources(
391391
try:
392392
await remove_all_projects_for_user(app=app, user_id=user_id)
393393
await remove_user(app=app, user_id=user_id)
394-
except Exception as e: # pylint: disable=broad-except
395-
logger.warning("%s", e)
394+
395+
except psycopg2.DatabaseError:
396396
logger.warning(
397397
"Could not remove GUEST with id=%s. Check the logs above for details",
398398
user_id,
399+
exc_info=True,
399400
)
400401

401402

@@ -421,7 +422,7 @@ async def remove_all_projects_for_user(app: web.Application, user_id: int) -> No
421422
user_id,
422423
)
423424
return
424-
user_primary_gid: str = str(project_owner["primary_gid"])
425+
user_primary_gid = int(project_owner["primary_gid"])
425426

426427
# fetch all projects for the user
427428
user_project_uuids = await app[
@@ -488,8 +489,8 @@ async def get_new_project_owner_gid(
488489
project_uuid: str,
489490
user_id: int,
490491
user_primary_gid: int,
491-
project: RowProxy,
492-
) -> str:
492+
project: Dict,
493+
) -> Optional[int]:
493494
"""Goes through the access rights and tries to find a new suitable owner.
494495
The first viable user is selected as a new owner.
495496
In order to become a new owner the user must have write access right.
@@ -549,7 +550,7 @@ async def get_new_project_owner_gid(
549550

550551
async def fetch_new_project_owner_from_groups(
551552
app: web.Application, standard_groups: Dict, user_id: int
552-
) -> int:
553+
) -> Optional[int]:
553554
"""Iterate over all the users in a group and if the users exists in the db
554555
return its gid"""
555556

@@ -570,6 +571,7 @@ async def fetch_new_project_owner_from_groups(
570571
"Could not find new owner '%s' will try a new one",
571572
possible_user_id,
572573
)
574+
573575
return None
574576

575577

@@ -578,17 +580,19 @@ async def replace_current_owner(
578580
project_uuid: str,
579581
user_primary_gid: int,
580582
new_project_owner_gid: str,
581-
project: RowProxy,
583+
project: Dict,
582584
) -> None:
583585
try:
584586
new_project_owner_id = await get_user_id_from_gid(
585587
app=app, primary_gid=int(new_project_owner_gid)
586588
)
587-
except Exception: # pylint: disable=broad-except
589+
590+
except psycopg2.DatabaseError:
588591
logger.exception(
589592
"Could not recover new user id from gid %s", new_project_owner_gid
590593
)
591594
return
595+
592596
# the result might me none
593597
if new_project_owner_id is None:
594598
logger.warning(
@@ -604,13 +608,14 @@ async def replace_current_owner(
604608
str(new_project_owner_gid)
605609
] = ProjectAccessRights.OWNER.value
606610
logger.error("Syncing back project %s", project)
611+
607612
# syncing back project data
608613
try:
609614
await app[APP_PROJECT_DBAPI].update_project_without_enforcing_checks(
610615
project_data=project,
611616
project_uuid=project_uuid,
612617
)
613-
except Exception: # pylint: disable=broad-except
618+
except psycopg2.DatabaseError:
614619
logger.exception(
615620
"Could not remove old owner and replaced it with user %s",
616621
new_project_owner_id,
@@ -621,7 +626,7 @@ async def remove_user(app: web.Application, user_id: int) -> None:
621626
"""Tries to remove a user, if the users still exists a warning message will be displayed"""
622627
try:
623628
await delete_user(app, user_id)
624-
except Exception: # pylint: disable=broad-except
629+
except psycopg2.DatabaseError:
625630
logger.warning(
626631
"User '%s' still has some projects, could not be deleted", user_id
627632
)

0 commit comments

Comments
 (0)