Skip to content

Commit 1b7a47e

Browse files
authored
Is1570/study fails 500 (#1572)
Fixes #1570 * increases verbose in gc errors * Turned gather errors into warnings * Fixing issue when valid token with invalid user * Authz cache cleared when any user is deleted * Tests fix emulating a valid cookie invalid user * Enhances error handling
1 parent bdfcd08 commit 1b7a47e

File tree

9 files changed

+198
-71
lines changed

9 files changed

+198
-71
lines changed

packages/service-library/src/servicelib/utils.py

+14-4
Original file line numberDiff line numberDiff line change
@@ -60,15 +60,25 @@ def log_exception_callback(fut: asyncio.Future):
6060

6161

6262
# // tasks
63-
async def logged_gather(*tasks, reraise: bool = True) -> List[Any]:
64-
# all coroutine called in // and we take care of returning the exceptions
63+
async def logged_gather(
64+
*tasks, reraise: bool = True, log: logging.Logger = logger
65+
) -> List[Any]:
66+
"""
67+
*all* coroutine passed are executed concurrently and once they are all
68+
completed, the first error (if any) is reraised or all returned
69+
70+
log: passing the logger gives a chance to identify the origin of the gather call
71+
"""
6572
results = await asyncio.gather(*tasks, return_exceptions=True)
6673
for value in results:
74+
# WARN: note that ONLY THE FIRST exception is raised
6775
if isinstance(value, Exception):
6876
if reraise:
6977
raise value
70-
logger.error(
71-
"Exception occured while running %s: %s",
78+
# Exception is returned, therefore it is not logged as error but as warning
79+
# It was user's decision not to reraise them
80+
log.warning(
81+
"Exception occured while running task %s in gather: %s",
7282
str(tasks[results.index(value)]),
7383
str(value),
7484
)

services/web/server/src/simcore_service_webserver/resource_manager/garbage_collector.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ async def garbage_collector_task(app: web.Application):
177177
keep_alive = False
178178
logger.info("Garbage collection task was cancelled, it will not restart!")
179179
except Exception: # pylint: disable=broad-except
180-
logger.warning("There was an error during garbage collection, restarting...")
180+
logger.warning("There was an error during garbage collection, restarting...", exc_info=True)
181181
# will wait 5 seconds before restarting to avoid restart loops
182182
await asyncio.sleep(5)
183183

services/web/server/src/simcore_service_webserver/security_api.py

+10-4
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from aiopg.sa import Engine
1919

2020
from .db_models import UserStatus, users
21+
from .security_authorization import AuthorizationPolicy, RoleBasedAccessModel
2122
from .security_roles import UserRole
2223

2324
log = logging.getLogger(__file__)
@@ -35,19 +36,24 @@ async def check_credentials(engine: Engine, email: str, password: str) -> bool:
3536
return False
3637

3738

38-
def encrypt_password(password):
39+
def encrypt_password(password: str) -> str:
3940
return passlib.hash.sha256_crypt.encrypt(password, rounds=1000)
4041

4142

42-
def check_password(password, password_hash):
43+
def check_password(password: str, password_hash: str) -> bool:
4344
return passlib.hash.sha256_crypt.verify(password, password_hash)
4445

4546

46-
def get_access_model(app: web.Application):
47-
autz_policy = app[AUTZ_KEY]
47+
def get_access_model(app: web.Application) -> RoleBasedAccessModel:
48+
autz_policy: AuthorizationPolicy = app[AUTZ_KEY]
4849
return autz_policy.access_model
4950

5051

52+
def clean_auth_policy_cache(app: web.Application) -> None:
53+
autz_policy: AuthorizationPolicy = app[AUTZ_KEY]
54+
autz_policy.timed_cache.clear()
55+
56+
5157
__all__ = (
5258
"encrypt_password",
5359
"check_credentials",

services/web/server/src/simcore_service_webserver/studies_access.py

+31-15
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from .login.decorators import login_required
2424
from .security_api import is_anonymous, remember
2525
from .statics import INDEX_RESOURCE_NAME
26+
from .utils import compose_error_msg
2627

2728
log = logging.getLogger(__name__)
2829

@@ -156,7 +157,9 @@ async def access_study(request: web.Request) -> web.Response:
156157
157158
- public studies are templates that are marked as published in the database
158159
- if user is not registered, it creates a temporary guest account with limited resources and expiration
160+
- this handler is NOT part of the API and therefore does NOT respond with json
159161
"""
162+
# TODO: implement nice error-page.html
160163
project_id = request.match_info["id"]
161164

162165
template_project = await get_public_project(request.app, project_id)
@@ -166,25 +169,38 @@ async def access_study(request: web.Request) -> web.Response:
166169
Please contact the data curators for more information."
167170
)
168171

172+
# Get or create a valid user
169173
user = None
170174
is_anonymous_user = await is_anonymous(request)
171-
if is_anonymous_user:
172-
log.debug("Creating temporary user ...")
173-
user = await create_temporary_user(request)
174-
else:
175+
if not is_anonymous_user:
176+
# NOTE: covers valid cookie with unauthorized user (e.g. expired guest/banned)
177+
# TODO: test if temp user overrides old cookie properly
175178
user = await get_authorized_user(request)
176179

177180
if not user:
178-
raise RuntimeError("Unable to start user session")
181+
log.debug("Creating temporary user ...")
182+
user = await create_temporary_user(request)
183+
is_anonymous_user = True
179184

180-
log.debug(
181-
"Granted access to study '%s' for user %s. Copying study over ...",
182-
template_project.get("name"),
183-
user.get("email"),
184-
)
185-
copied_project_id = await copy_study_to_account(request, template_project, user)
185+
try:
186+
log.debug(
187+
"Granted access to study '%s' for user %s. Copying study over ...",
188+
template_project.get("name"),
189+
user.get("email"),
190+
)
191+
copied_project_id = await copy_study_to_account(request, template_project, user)
192+
193+
log.debug("Study %s copied", copied_project_id)
186194

187-
log.debug("Study %s copied", copied_project_id)
195+
except Exception: # pylint: disable=broad-except
196+
log.exception(
197+
"Failed while copying project '%s' to '%s'",
198+
template_project.get("name"),
199+
user.get("email"),
200+
)
201+
raise web.HTTPInternalServerError(
202+
reason=compose_error_msg("Unable to copy project.")
203+
)
188204

189205
try:
190206
redirect_url = (
@@ -193,11 +209,11 @@ async def access_study(request: web.Request) -> web.Response:
193209
.with_fragment("/study/{}".format(copied_project_id))
194210
)
195211
except KeyError:
196-
log.error(
212+
log.exception(
197213
"Cannot redirect to website because route was not registered. Probably qx output was not ready and it was disabled (see statics.py)"
198214
)
199-
raise RuntimeError(
200-
"Unable to serve front-end. Study has been anyway copied over to user."
215+
raise web.HTTPInternalServerError(
216+
reason=compose_error_msg("Unable to serve front-end.")
201217
)
202218

203219
response = web.HTTPFound(location=redirect_url)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{% block title %}
2+
Site Error
3+
{% endblock %}
4+
5+
{% block content %}
6+
<h1>Opps, this is a bit embarrasing</h1>
7+
<p>
8+
<b>
9+
<big>
10+
{{ error_text }}
11+
</big>
12+
</b>
13+
</p>
14+
{% endblock %}

services/web/server/src/simcore_service_webserver/users_api.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,9 @@
1212

1313
from .db_models import GroupType, groups, tokens, user_to_groups, users
1414
from .groups_api import convert_groups_db_to_schema
15-
from .users_utils import convert_user_db_to_schema
15+
from .security_api import clean_auth_policy_cache
1616
from .users_exceptions import UserNotFoundError
17+
from .users_utils import convert_user_db_to_schema
1718

1819
logger = logging.getLogger(__name__)
1920

@@ -23,6 +24,7 @@ async def get_user_profile(app: web.Application, user_id: int) -> Dict[str, Any]
2324
user_profile: Dict[str, Any] = {}
2425
user_primary_group = all_group = {}
2526
user_standard_groups = []
27+
2628
async with engine.acquire() as conn:
2729
async for row in conn.execute(
2830
sa.select(
@@ -105,6 +107,11 @@ async def is_user_guest(app: web.Application, user_id: int) -> bool:
105107

106108
async def delete_user(app: web.Application, user_id: int) -> None:
107109
"""Deletes a user from the database if the user exists"""
110+
# FIXME: user cannot be deleted without deleting first all ist project
111+
# otherwise this function will raise asyncpg.exceptions.ForeignKeyViolationError
112+
# Consider "marking" users as deleted and havning a background job that
113+
# cleans it up
114+
108115
db = get_storage(app)
109116
user = await db.get_user({"id": user_id})
110117
if not user:
@@ -115,6 +122,10 @@ async def delete_user(app: web.Application, user_id: int) -> None:
115122

116123
await db.delete_user(user)
117124

125+
# This user might be cached in the auth. If so, any request
126+
# with this user-id will get thru producing unexpected side-effects
127+
clean_auth_policy_cache(app)
128+
118129

119130
# TOKEN -------------------------------------------
120131
async def create_token(

services/web/server/src/simcore_service_webserver/users_handlers.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,7 @@
88
from . import users_api
99
from .login.decorators import RQT_USERID_KEY, login_required
1010
from .security_decorators import permission_required
11-
from .users_exceptions import (
12-
TokenNotFoundError,
13-
UserNotFoundError,
14-
)
11+
from .users_exceptions import TokenNotFoundError, UserNotFoundError
1512

1613
logger = logging.getLogger(__name__)
1714

@@ -24,7 +21,8 @@ async def get_my_profile(request: web.Request):
2421
try:
2522
return await users_api.get_user_profile(request.app, uid)
2623
except UserNotFoundError:
27-
raise web.HTTPServerError(reason="could not find profile!")
24+
# NOTE: invalid user_id could happen due to timed-cache in AuthorizationPolicy
25+
raise web.HTTPNotFound(reason="Could not find profile!")
2826

2927

3028
@login_required

services/web/server/src/simcore_service_webserver/utils.py

+5
Original file line numberDiff line numberDiff line change
@@ -196,3 +196,8 @@ def get_tracemalloc_info(top=10) -> List[str]:
196196
)
197197

198198
return top_trace
199+
200+
201+
def compose_error_msg(msg: str) -> str:
202+
msg = msg.strip()
203+
return f"{msg}. Please send this message to [email protected] [{now_str()}]"

0 commit comments

Comments
 (0)