Skip to content

enhancements on scicrunch classifiers (backend) #2065

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Dec 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions api/specs/webserver/openapi-groups.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -250,10 +250,10 @@ paths:
responses:
"200":
description: Got information of a valid RRID
"404":
description: Cannot find a valid research resource for the provided RRID
"400":
description: Invalid RRID
"503":
description: Underline validation service is not reachable
description: scircrunch.org service is not reachable
default:
$ref: "#/components/responses/DefaultErrorResponse"
post:
Expand All @@ -264,8 +264,10 @@ paths:
responses:
"200":
description: Got information of a valid RRID
"422":
"400":
description: Invalid RRID
"503":
description: scircrunch.org service is not reachable
default:
$ref: "#/components/responses/DefaultErrorResponse"

Expand All @@ -284,6 +286,8 @@ paths:
responses:
"200":
description: Got information of a valid RRID
"503":
description: scircrunch.org service is not reachable
default:
$ref: "#/components/responses/DefaultErrorResponse"
components:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4661,10 +4661,10 @@ paths:
responses:
'200':
description: Got information of a valid RRID
'404':
description: Cannot find a valid research resource for the provided RRID
'400':
description: Invalid RRID
'503':
description: Underline validation service is not reachable
description: scircrunch.org service is not reachable
default:
description: Default http error response body
content:
Expand Down Expand Up @@ -4756,8 +4756,10 @@ paths:
responses:
'200':
description: Got information of a valid RRID
'422':
'400':
description: Invalid RRID
'503':
description: scircrunch.org service is not reachable
default:
description: Default http error response body
content:
Expand Down Expand Up @@ -4856,6 +4858,8 @@ paths:
responses:
'200':
description: Got information of a valid RRID
'503':
description: scircrunch.org service is not reachable
default:
description: Default http error response body
content:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

from .constants import APP_DB_ENGINE_KEY
from .scicrunch.scicrunch_db import ResearchResourceRepository
from .scicrunch.service_client import SciCrunchAPI
from .scicrunch.service_client import SciCrunch

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -114,7 +114,7 @@ async def build_rrids_tree_view(app, tree_view_mode="std") -> Dict[str, Any]:
reason="Currently only 'std' option for the classifiers tree view is implemented"
)

scicrunch = SciCrunchAPI.get_instance(app)
scicrunch = SciCrunch.get_instance(app)
repo = ResearchResourceRepository(app)

flat_tree_view = {}
Expand Down
106 changes: 66 additions & 40 deletions services/web/server/src/simcore_service_webserver/groups_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import json
import logging
from typing import Optional
from typing import List, Optional

from aiohttp import web

Expand All @@ -15,8 +15,8 @@
)
from .login.decorators import RQT_USERID_KEY, login_required
from .scicrunch.scicrunch_db import ResearchResourceRepository
from .scicrunch.scicrunch_models import ListOfResourceHits, ResearchResource
from .scicrunch.service_client import SciCrunchAPI
from .scicrunch.scicrunch_models import ResearchResource, ResourceHit
from .scicrunch.service_client import InvalidRRID, SciCrunch, ScicrunchError
from .security_decorators import permission_required
from .users_exceptions import UserNotFoundError

Expand Down Expand Up @@ -199,66 +199,92 @@ async def delete_group_user(request: web.Request):
@login_required
@permission_required("groups.*")
async def get_group_classifiers(request: web.Request):
gid = int(request.match_info["gid"]) # FIXME: raise http enetity error if not int
classifiers_tree_view = {}

repo = GroupClassifierRepository(request.app)
if not await repo.group_uses_scicrunch(gid):
classifiers_tree_view = await repo.get_classifiers_from_bundle(gid)
else:
classifiers_tree_view = await build_rrids_tree_view(
try:
gid = int(request.match_info["gid"])
# FIXME: Raise ValidationError and handle as bad request.
# Now middleware will convert as server error but it is a client error

repo = GroupClassifierRepository(request.app)
if not await repo.group_uses_scicrunch(gid):
return await repo.get_classifiers_from_bundle(gid)

# otherwise, build dynamic tree with RRIDs
return await build_rrids_tree_view(
request.app, tree_view_mode=request.query.get("tree_view", "std")
)

return classifiers_tree_view
except ScicrunchError:
return {}


# GET /groups/sparc/classifiers/scicrunch-resources/{rrid}
@login_required
@permission_required("groups.*")
async def get_scicrunch_resource(request: web.Request):
rrid = request.match_info["rrid"]
rrid = SciCrunchAPI.validate_identifier(rrid)
try:
rrid = request.match_info["rrid"]
rrid = SciCrunch.validate_identifier(rrid)

# check if in database first
repo = ResearchResourceRepository(request.app)
resource: Optional[ResearchResource] = await repo.get_resource(rrid)
if not resource:
# otherwise, request to scicrunch service
scicrunch = SciCrunch.get_instance(request.app)
resource = await scicrunch.get_resource_fields(rrid)

return resource.dict()

# check if in database first
repo = ResearchResourceRepository(request.app)
resource: Optional[ResearchResource] = await repo.get_resource(rrid)
if not resource:
# otherwise, request to scicrunch service
scicrunch = SciCrunchAPI.get_instance(request.app, raises=True)
scicrunch_resource = await scicrunch.get_resource_fields(rrid)
resource = scicrunch_resource.convert_to_api_model()
return resource.dict()
except InvalidRRID as err:
raise web.HTTPBadRequest(reason=err.reason) from err

except ScicrunchError as err:
user_msg = "Cannot get RRID since scicrunch.org service is not reachable."
logger.error("%s -> %s", err, user_msg)
raise web.HTTPServiceUnavailable(reason=user_msg) from err


# POST /groups/sparc/classifiers/scicrunch-resources/{rrid}
@login_required
@permission_required("groups.*")
async def add_scicrunch_resource(request: web.Request):
rrid = request.match_info["rrid"]
try:
rrid = request.match_info["rrid"]

# check if exists
repo = ResearchResourceRepository(request.app)
resource: Optional[ResearchResource] = await repo.get_resource(rrid)
if not resource:
# then request scicrunch service
scicrunch = SciCrunch.get_instance(request.app)
resource = await scicrunch.get_resource_fields(rrid)

# check if exists
repo = ResearchResourceRepository(request.app)
resource: Optional[ResearchResource] = await repo.get_resource(rrid)
if not resource:
# then request scicrunch service
scicrunch = SciCrunchAPI.get_instance(request.app, raises=True)
scicrunch_resource = await scicrunch.get_resource_fields(rrid)
resource = scicrunch_resource.convert_to_api_model()
# insert new or if exists, then update
await repo.upsert(resource)

# insert new or if exists, then update
await repo.upsert(resource)
return resource.dict()

return resource.dict()
except InvalidRRID as err:
raise web.HTTPBadRequest(reason=err.reason) from err

except ScicrunchError as err:
user_msg = "Cannot add RRID since scicrunch.org service is not reachable."
logger.error("%s -> %s", err, user_msg)
raise web.HTTPServiceUnavailable(reason=user_msg) from err


# GET /groups/sparc/classifiers/scicrunch-resources:search
@login_required
@permission_required("groups.*")
async def search_scicrunch_resources(request: web.Request):
guess_name: str = request.query["guess_name"]
try:
guess_name = str(request.query["guess_name"]).strip()

scicrunch = SciCrunch.get_instance(request.app)
hits: List[ResourceHit] = await scicrunch.search_resource(guess_name)

scicrunch = SciCrunchAPI.get_instance(request.app, raises=True)
hits: ListOfResourceHits = await scicrunch.search_resource(guess_name)
return [hit.dict() for hit in hits]

return hits.dict()["__root__"]
except ScicrunchError as err:
user_msg = "Cannot search since scicrunch.org service is not reachable."
logger.error("%s -> %s", err, user_msg)
raise web.HTTPServiceUnavailable(reason=user_msg) from err
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,6 @@
# TODO: read https://www.force11.org/group/resource-identification-initiative
SCICRUNCH_DEFAULT_URL = "https://scicrunch.org"

# To ensure they are recognizable, unique, and traceable, identifiers are prefixed with " RRID: ",
# followed by a second tag that indicates the source authority that provided it
# (e.g. "AB" for the Antibody Registry, "CVCL" for the Cellosaurus, "MMRRC" for Mutant Mouse Regional Resource Centers,
# "SCR" for the SciCrunch registry of tools).
# SEE https://scicrunch.org/resources

STRICT_RRID_PATTERN = r"(RRID:)\s*(SCR_\d+)"
RRID_PATTERN = r"(RRID:)?\s*(SCR_\d+)"


class SciCrunchSettings(BaseSettings):

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,61 @@
"""

import logging
import re
from datetime import datetime
from typing import Any, Dict, List, Union

from pydantic import BaseModel, Field, constr, validator
from yarl import URL

from ._config import STRICT_RRID_PATTERN

logger = logging.getLogger(__name__)


# Research Resource Identifiers --------------------------------
#
# To ensure they are recognizable, unique, and traceable,
# identifiers are prefixed with " RRID:",
# followed by a second tag that indicates the source authority that provided it:
#
# "AB" for the Antibody Registry,
# "CVCL" for the Cellosaurus,
# "MMRRC" for Mutant Mouse Regional Resource Centers,
# "SCR" for the SciCrunch registry of tools
#
# SEE https://scicrunch.org/resources

STRICT_RRID_PATTERN = r"^(RRID:)([^_\s]+)_(\S+)$" # Expected in db labels and models

RRID_TAG_PATTERN = r"(RRID:)?\s*([^:_\s]+)_(\S+)"
rrid_capture_re = re.compile(RRID_TAG_PATTERN)


def normalize_rrid_tags(rrid_tag: str, *, with_prefix: bool = True) -> str:
try:
# validate & parse
_, source_authority, identifier = rrid_capture_re.search(rrid_tag).groups()
# format according to norm
rrid = f"{source_authority}_{identifier}"
if with_prefix:
rrid = "RRID:" + rrid
return rrid
except AttributeError:
raise ValueError(f"'{rrid_tag}' does not match a RRID pattern")


# webserver API models -----------------------------------------
class ResearchResource(BaseModel):
rrid: constr(
regex=STRICT_RRID_PATTERN
) # unique identifier used as classifier, i.e. to tag studies and services
rrid: constr(regex=STRICT_RRID_PATTERN) = Field(
...,
description="Unique identifier used as classifier, i.e. to tag studies and services",
)
name: str
description: str

@validator("rrid", pre=True)
@classmethod
def format_rrid(cls, v):
if not v.startswith("RRID:"):
return f"RRID: {v}"
return v
return normalize_rrid_tags(v, with_prefix=True)

class Config:
orm_mode = True
Expand All @@ -47,22 +77,14 @@ class ResearchResourceAtdB(ResearchResource):
class FieldItem(BaseModel):
field_name: str = Field(..., alias="field")
required: bool
# field_type: str = Field(..., alias="type") # text, textarea, resource-types, ...
# max_number: str # convertable to int
value: Union[str, None, List[Any]] = None
# position: int
# display: str # title, descripiotn, url, text, owner-text
alt: str # alternative text


class ResourceView(BaseModel):
resource_fields: List[FieldItem] = Field([], alias="fields")
version: int
curation_status: str
last_curated_version: int
# uuid: UUID
# NOTE: image_src is a path from https://scicrunch.org/ e.g. https://scicrunch.org/upload/resource-images/18997.png
# image_src: Optional[str]
scicrunch_id: str

@classmethod
Expand All @@ -74,8 +96,6 @@ def from_response_payload(cls, payload: Dict):
def is_curated(self) -> bool:
return self.curation_status.lower() == "curated"

# TODO: add validator to capture only selected fields

def _get_field(self, fieldname: str):
for field in self.resource_fields:
if field.field_name == fieldname:
Expand All @@ -91,17 +111,9 @@ def get_description(self):
def get_resource_url(self):
return URL(str(self._get_field("Resource URL")))

def convert_to_api_model(self) -> ResearchResource:
return ResearchResource(
rrid=self.scicrunch_id,
name=self.get_name(),
description=self.get_description(),
)


class ResourceHit(BaseModel):
rrid: str = Field(..., alias="rid")
# original_id: str
name: str


Expand Down
Loading