Skip to content

Commit 074f03a

Browse files
authored
✨ Allow more scicrunch RRIDs as classifiers (#2110)
Allows adding any identifier scicrunch (besides actual resources) as classifiers by using the scicrunch resolver entrypoint.
1 parent 4d0f014 commit 074f03a

19 files changed

+735
-289
lines changed

services/web/server/src/simcore_service_webserver/exporter/formatters/formatter_v2.py

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,32 @@
11
import asyncio
22
import logging
3+
from collections import deque
34
from pathlib import Path
45
from typing import Optional
5-
from collections import deque
66

77
from aiohttp import web
8-
from aiopg.sa.result import ResultProxy, RowProxy
98
from aiopg.sa.engine import SAConnection
10-
11-
from simcore_postgres_database.models.scicrunch_resources import scicrunch_resources
9+
from aiopg.sa.result import ResultProxy, RowProxy
1210
from servicelib.pools import non_blocking_process_pool_executor
11+
from simcore_postgres_database.models.scicrunch_resources import scicrunch_resources
12+
from simcore_service_webserver.catalog_client import get_service
13+
from simcore_service_webserver.projects.projects_api import get_project_for_user
14+
from simcore_service_webserver.projects.projects_exceptions import ProjectsException
15+
from simcore_service_webserver.scicrunch.db import ResearchResourceRepository
1316

1417
from ..exceptions import ExporterException
15-
from .formatter_v1 import FormatterV1
1618
from .base_formatter import BaseFormatter
19+
from .formatter_v1 import FormatterV1
1720
from .sds import write_sds_directory_content
18-
from .sds.xlsx.templates.submission import SubmissionDocumentParams
19-
from .sds.xlsx.templates.dataset_description import DatasetDescriptionParams
2021
from .sds.xlsx.templates.code_description import (
21-
CodeDescriptionParams,
2222
CodeDescriptionModel,
23-
RRIDEntry,
23+
CodeDescriptionParams,
2424
InputsEntryModel,
2525
OutputsEntryModel,
26+
RRIDEntry,
2627
)
27-
28-
from simcore_service_webserver.projects.projects_exceptions import ProjectsException
29-
from simcore_service_webserver.projects.projects_api import get_project_for_user
30-
from simcore_service_webserver.catalog_client import get_service
31-
from simcore_service_webserver.scicrunch.scicrunch_db import ResearchResourceRepository
28+
from .sds.xlsx.templates.dataset_description import DatasetDescriptionParams
29+
from .sds.xlsx.templates.submission import SubmissionDocumentParams
3230

3331
log = logging.getLogger(__name__)
3432

services/web/server/src/simcore_service_webserver/groups_classifiers.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from simcore_postgres_database.models.classifiers import group_classifiers
1919

2020
from .constants import APP_DB_ENGINE_KEY
21-
from .scicrunch.scicrunch_db import ResearchResourceRepository
21+
from .scicrunch.db import ResearchResourceRepository
2222
from .scicrunch.service_client import SciCrunch
2323

2424
logger = logging.getLogger(__name__)
@@ -124,7 +124,7 @@ async def build_rrids_tree_view(app, tree_view_mode="std") -> Dict[str, Any]:
124124
classifier=resource.rrid,
125125
display_name=resource.name.title(),
126126
short_description=resource.description,
127-
url=scicrunch.get_rrid_link(resource.rrid),
127+
url=scicrunch.get_resolver_web_url(resource.rrid),
128128
)
129129

130130
node = validated_item.display_name.replace(":", " ")

services/web/server/src/simcore_service_webserver/groups_handlers.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,10 @@
1414
UserInsufficientRightsError,
1515
)
1616
from .login.decorators import RQT_USERID_KEY, login_required
17-
from .scicrunch.scicrunch_db import ResearchResourceRepository
18-
from .scicrunch.scicrunch_models import ResearchResource, ResourceHit
19-
from .scicrunch.service_client import InvalidRRID, SciCrunch, ScicrunchError
17+
from .scicrunch.db import ResearchResourceRepository
18+
from .scicrunch.errors import ScicrunchError
19+
from .scicrunch.models import ResearchResource, ResourceHit
20+
from .scicrunch.service_client import InvalidRRID, SciCrunch
2021
from .security_decorators import permission_required
2122
from .users_exceptions import UserNotFoundError
2223

services/web/server/src/simcore_service_webserver/scicrunch/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""
22
Submodule to interact with K-Core's https://scicrunch.org service
33
- client to validate and get info about RRIDs via scicrunch's API (service_client)
4-
- keeps validated RRIDs in pg-database (scicrunch_db)
4+
- keeps validated RRIDs in pg-database (scicrunch.db)
55
- define models for all interfaces: scicrunch API, postgres DB and webserver API (scicrunch_models)
66
77
NOTE: should have no dependencies with other modules in this service

services/web/server/src/simcore_service_webserver/scicrunch/_config.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,16 @@
66

77
class SciCrunchSettings(BaseSettings):
88

9-
api_base_url: HttpUrl = Field(
9+
SCICRUNCH_API_BASE_URL: HttpUrl = Field(
1010
f"{SCICRUNCH_DEFAULT_URL}/api/1",
1111
description="Base url to scicrunch API's entrypoint",
1212
)
1313

1414
# NOTE: Login in https://scicrunch.org and get API Key under My Account -> API Keys
1515
# WARNING: this needs to be setup in osparc-ops before deploying
16-
api_key: SecretStr
16+
SCICRUNCH_API_KEY: SecretStr
1717

18-
class Config:
19-
case_sensitive = False
20-
env_prefix = "SCICRUNCH_"
18+
SCICRUNCH_RESOLVER_BASE_URL: HttpUrl = Field(
19+
f"{SCICRUNCH_DEFAULT_URL}/resolver",
20+
description="Base url to scicrunch resolver entrypoint",
21+
)
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
"""
2+
Layer to interact withscicrunch service resolver API
3+
SEE https://scicrunch.org/resolver
4+
5+
"""
6+
7+
import logging
8+
from datetime import datetime
9+
from typing import Any, Dict, List, Optional
10+
11+
from aiohttp import ClientSession
12+
from pydantic import Field
13+
from pydantic.main import BaseModel
14+
from pydantic.types import NonNegativeInt
15+
16+
from ._config import SciCrunchSettings
17+
18+
logger = logging.getLogger(__name__)
19+
20+
21+
# MODELS ---------------------------
22+
23+
# This is a partial model from the resolver response
24+
# that extracts the information we are interested
25+
# NOTE: this model was deduced by trial-and-error
26+
#
27+
class ItemInfo(BaseModel):
28+
description: str = ""
29+
name: str
30+
identifier: str
31+
32+
33+
class RRIDInfo(BaseModel):
34+
is_unique: bool = True
35+
proper_citation: str = Field(..., alias="properCitation")
36+
37+
38+
class HitSource(BaseModel):
39+
item: ItemInfo
40+
rrid: RRIDInfo
41+
42+
def flatten_dict(self) -> Dict[str, Any]:
43+
"""Used as an output"""
44+
return {**self.item.dict(), **self.rrid.dict()}
45+
46+
47+
class HitDetail(BaseModel):
48+
source: HitSource = Field(..., alias="_source")
49+
50+
51+
class Hits(BaseModel):
52+
total: NonNegativeInt
53+
hits: List[HitDetail]
54+
55+
56+
class ResolverInfo(BaseModel):
57+
uri: str
58+
timestamp: datetime
59+
60+
61+
class ResolverResponseBody(BaseModel):
62+
hits: Hits
63+
resolver: ResolverInfo
64+
65+
66+
class ResolvedItem(BaseModel):
67+
""" Result model for resolve_rrid """
68+
69+
description: str
70+
name: str
71+
identifier: str
72+
is_unique: bool
73+
proper_citation: str
74+
75+
76+
# REQUESTS --------------------------------
77+
78+
79+
async def resolve_rrid(
80+
identifier: str, client: ClientSession, settings: SciCrunchSettings
81+
) -> Optional[ResolvedItem]:
82+
"""
83+
Provides a API to access to results as provided by this web https://scicrunch.org/resolver
84+
85+
"""
86+
# Example https://scicrunch.org/resolver/RRID:AB_90755.json
87+
identifier = identifier.strip()
88+
url = f"{settings.SCICRUNCH_RESOLVER_BASE_URL}/{identifier}.json"
89+
90+
async with client.get(url, raise_for_status=True) as resp:
91+
body = await resp.json()
92+
93+
# process and simplify response
94+
resolved = ResolverResponseBody.parse_obj(body)
95+
if resolved.hits.total == 0:
96+
return None
97+
98+
hit = resolved.hits.hits[0].source
99+
100+
if resolved.hits.total > 1:
101+
logger.warning(
102+
"Multiple hits (%d) for '%s'. Returning first",
103+
resolved.hits.total,
104+
identifier,
105+
)
106+
else:
107+
assert resolved.hits.total == 1 # nosec
108+
109+
output = ResolvedItem.parse_obj(hit.flatten_dict())
110+
return output
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
"""
2+
Functions and models to query scicrunch service REST API (https://scicrunch.org/api/)
3+
4+
- http client for API requests
5+
- Error handling:
6+
- translates network errors
7+
- translates request error codes
8+
9+
Free functions with raw request scicrunch.org API
10+
- client request context
11+
- raise_for_status=True -> Raise an aiohttp.ClientResponseError if the response status is 400 or higher
12+
- validates response and prunes using pydantic models
13+
14+
SEE test_scicrunch_service_api.py
15+
"""
16+
17+
import logging
18+
from typing import Any, Dict, List, Union
19+
20+
from aiohttp import ClientSession
21+
from pydantic import BaseModel, Field
22+
from yarl import URL
23+
24+
from ._config import SciCrunchSettings
25+
from .models import ResourceHit
26+
27+
logger = logging.getLogger(__name__)
28+
29+
30+
# MODELS --
31+
#
32+
# NOTE: These models are a trucated version of the data payload for a scicrunch response.#
33+
# NOTE: Examples of complete responses can be found in test_scicrunch.py::mock_scicrunch_service_api
34+
#
35+
36+
37+
class FieldItem(BaseModel):
38+
field_name: str = Field(..., alias="field")
39+
required: bool
40+
value: Union[str, None, List[Any]] = None
41+
42+
43+
class ResourceView(BaseModel):
44+
resource_fields: List[FieldItem] = Field([], alias="fields")
45+
version: int
46+
curation_status: str
47+
last_curated_version: int
48+
scicrunch_id: str
49+
50+
@classmethod
51+
def from_response_payload(cls, payload: Dict):
52+
assert payload["success"] == True # nosec
53+
return cls(**payload["data"])
54+
55+
@property
56+
def is_curated(self) -> bool:
57+
return self.curation_status.lower() == "curated"
58+
59+
def _get_field(self, fieldname: str):
60+
for field in self.resource_fields:
61+
if field.field_name == fieldname:
62+
return field.value
63+
raise ValueError(f"Cannot file expected field {fieldname}")
64+
65+
def get_name(self):
66+
return str(self._get_field("Resource Name"))
67+
68+
def get_description(self):
69+
return str(self._get_field("Description"))
70+
71+
def get_resource_url(self):
72+
return URL(str(self._get_field("Resource URL")))
73+
74+
75+
class ListOfResourceHits(BaseModel):
76+
__root__: List[ResourceHit]
77+
78+
79+
# REQUESTS
80+
81+
82+
async def get_all_versions(
83+
unprefixed_rrid: str, client: ClientSession, settings: SciCrunchSettings
84+
) -> List[Dict[str, Any]]:
85+
async with client.get(
86+
f"{settings.SCICRUNCH_API_BASE_URL}/resource/versions/all/{unprefixed_rrid}",
87+
params={"key": settings.SCICRUNCH_API_KEY.get_secret_value()},
88+
raise_for_status=True,
89+
) as resp:
90+
body = await resp.json()
91+
return body.get("data") if body.get("success") else []
92+
93+
94+
async def get_resource_fields(
95+
rrid: str, client: ClientSession, settings: SciCrunchSettings
96+
) -> ResourceView:
97+
async with client.get(
98+
f"{settings.SCICRUNCH_API_BASE_URL}/resource/fields/view/{rrid}",
99+
params={"key": settings.SCICRUNCH_API_KEY.get_secret_value()},
100+
raise_for_status=True,
101+
) as resp:
102+
body = await resp.json()
103+
104+
assert body.get("success") # nosec
105+
return ResourceView(**body.get("data", {}))
106+
107+
108+
async def autocomplete_by_name(
109+
guess_name: str, client: ClientSession, settings: SciCrunchSettings
110+
) -> ListOfResourceHits:
111+
async with client.get(
112+
f"{settings.SCICRUNCH_API_BASE_URL}/resource/fields/autocomplete",
113+
params={
114+
"key": settings.SCICRUNCH_API_KEY.get_secret_value(),
115+
"field": "Resource Name",
116+
"value": guess_name.strip(),
117+
},
118+
raise_for_status=True,
119+
) as resp:
120+
body = await resp.json()
121+
assert body.get("success") # nosec
122+
return ListOfResourceHits.parse_obj(body.get("data", []))

services/web/server/src/simcore_service_webserver/scicrunch/scicrunch_db.py renamed to services/web/server/src/simcore_service_webserver/scicrunch/db.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from simcore_postgres_database.models.scicrunch_resources import scicrunch_resources
1313
from sqlalchemy.dialects.postgresql import insert as sa_pg_insert
1414

15-
from .scicrunch_models import ResearchResource, ResearchResourceAtdB
15+
from .models import ResearchResource, ResearchResourceAtdB
1616

1717
logger = logging.getLogger(__name__)
1818

0 commit comments

Comments
 (0)