Skip to content

oold and opensemantic package integration #107

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ package_dir =
# new major versions. This works if the required packages follow Semantic Versioning.
# For more information, check out https://semver.org/.
install_requires =
oold
opensemantic
pydantic>=1.10.17
datamodel-code-generator>=0.25
mwclient>=0.11.0
Expand All @@ -68,7 +70,6 @@ install_requires =
asyncio
tqdm
pybars3-wheel
backports.strenum; python_version<"3.11"

[options.packages.find]
where = src
Expand Down
2 changes: 1 addition & 1 deletion src/osw/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
from warnings import warn

import yaml
from opensemantic import OswBaseModel
from pydantic.v1 import PrivateAttr

from osw.defaults import paths as default_paths
from osw.model.static import OswBaseModel

if TYPE_CHECKING:
PossibleFilePath = Path
Expand Down
2 changes: 1 addition & 1 deletion src/osw/controller/database.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
from typing import Optional, Union

from opensemantic import OswBaseModel
from sqlalchemy import URL, create_engine
from sqlalchemy import text as sql_text
from sqlalchemy.engine import Engine

import osw.model.entity as model
from osw.auth import CredentialManager
from osw.core import OSW
from osw.model.static import OswBaseModel


class DatabaseController(model.Database):
Expand Down
2 changes: 1 addition & 1 deletion src/osw/controller/page_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@
from typing import Optional, Union
from warnings import warn

from opensemantic import OswBaseModel
from pydantic.v1 import FilePath
from typing_extensions import Dict, List

import osw.model.page_package as model
from osw.auth import CredentialManager
from osw.model import page_package as package
from osw.model.page_package import NAMESPACE_CONST_TO_NAMESPACE_MAPPING
from osw.model.static import OswBaseModel
from osw.utils.regex import RegExPatternExtended
from osw.wtsite import WtPage, WtSite

Expand Down
159 changes: 144 additions & 15 deletions src/osw/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,22 @@
import rdflib
from jsonpath_ng.ext import parse
from mwclient.client import Site
from oold.generator import Generator
from oold.model.v1 import (
ResolveParam,
Resolver,
ResolveResult,
SetResolverParam,
set_resolver,
)
from oold.utils.codegen import OOLDJsonSchemaParser
from opensemantic import OswBaseModel
from pydantic import PydanticDeprecatedSince20
from pydantic.v1 import BaseModel, Field, PrivateAttr, create_model, validator
from pyld import jsonld

import osw.model.entity as model
from osw.defaults import params as default_params
from osw.model.static import OswBaseModel
from osw.utils.oold import (
AggregateGeneratedSchemasParam,
AggregateGeneratedSchemasParamMode,
Expand Down Expand Up @@ -99,6 +108,30 @@ class Config:

site: WtSite

def __init__(self, **data: Any):
super().__init__(**data)

# implement resolver backend with osw.load_entity
class MyResolver(Resolver):

osw_obj: OSW

def resolve(self, request: ResolveParam):
print("RESOLVE", request)
entities = self.osw_obj.load_entity(
OSW.LoadEntityParam(titles=request.iris)
).entities
# create a dict with request.iris as keys and the loaded entities as values
# by iterating over both lists
nodes = {}
for iri, entity in zip(request.iris, entities):
nodes[iri] = entity
return ResolveResult(nodes=nodes)

r = MyResolver(osw_obj=self)
set_resolver(SetResolverParam(iri="Item", resolver=r))
set_resolver(SetResolverParam(iri="Category", resolver=r))

@property
def mw_site(self) -> Site:
"""Returns the mwclient Site object of the OSW instance."""
Expand Down Expand Up @@ -348,6 +381,17 @@ class FetchSchemaParam(BaseModel):
)
legacy_generator: Optional[bool] = False
"""uses legacy command line for code generation if true"""
generate_annotations: Optional[bool] = True
"""generate custom schema keywords in Fields and Classes.
Required to update the schema in OSW without information loss"""
offline_pages: Optional[Dict[str, WtPage]] = None
"""pages to be used offline instead of fetching them from the OSW instance"""
result_model_path: Optional[Union[str, pathlib.Path]] = None
"""path to the generated model file, if None,
the default path ./model/entity.py is used"""

class Config:
arbitrary_types_allowed = True

def fetch_schema(self, fetchSchemaParam: FetchSchemaParam = None) -> None:
"""Loads the given schemas from the OSW instance and auto-generates python
Expand All @@ -370,6 +414,9 @@ def fetch_schema(self, fetchSchemaParam: FetchSchemaParam = None) -> None:
schema_title=schema_title,
mode=mode,
legacy_generator=fetchSchemaParam.legacy_generator,
generate_annotations=fetchSchemaParam.generate_annotations,
offline_pages=fetchSchemaParam.offline_pages,
result_model_path=fetchSchemaParam.result_model_path,
)
)
first = False
Expand All @@ -396,6 +443,19 @@ class _FetchSchemaParam(BaseModel):
)
legacy_generator: Optional[bool] = False
"""uses legacy command line for code generation if true"""
generate_annotations: Optional[bool] = False
"""generate custom schema keywords in Fields and Classes.
Required to update the schema in OSW without information loss"""
offline_pages: Optional[Dict[str, WtPage]] = None
"""pages to be used offline instead of fetching them from the OSW instance"""
result_model_path: Optional[Union[str, pathlib.Path]] = None
"""path to the generated model file, if None,
the default path ./model/entity.py is used"""
fetched_schema_titles: Optional[List[str]] = []
"""keep track of fetched schema titles to prevent recursion"""

class Config:
arbitrary_types_allowed = True

def _fetch_schema(self, fetchSchemaParam: _FetchSchemaParam = None) -> None:
"""Loads the given schema from the OSW instance and autogenerates python
Expand All @@ -411,12 +471,23 @@ def _fetch_schema(self, fetchSchemaParam: _FetchSchemaParam = None) -> None:
if fetchSchemaParam is None:
fetchSchemaParam = OSW._FetchSchemaParam()
schema_title = fetchSchemaParam.schema_title
fetchSchemaParam.fetched_schema_titles.append(schema_title)
root = fetchSchemaParam.root
schema_name = schema_title.split(":")[-1]
page = self.site.get_page(WtSite.GetPageParam(titles=[schema_title])).pages[0]
if not page.exists:
print(f"Error: Page {schema_title} does not exist")
return
if (
fetchSchemaParam.offline_pages is not None
and schema_title in fetchSchemaParam.offline_pages
):
print(f"Fetch {schema_title} from offline pages")
page = fetchSchemaParam.offline_pages[schema_title]
else:
print(f"Fetch {schema_title} from online pages")
page = self.site.get_page(WtSite.GetPageParam(titles=[schema_title])).pages[
0
]
if not page.exists:
print(f"Error: Page {schema_title} does not exist")
return
# not only in the JsonSchema namespace the schema is located in the main sot
# in all other namespaces, the json_schema slot is used
if schema_title.startswith("JsonSchema:"):
Expand All @@ -433,6 +504,12 @@ def _fetch_schema(self, fetchSchemaParam: _FetchSchemaParam = None) -> None:
if (schema_str is None) or (schema_str == ""):
print(f"Error: Schema {schema_title} does not exist")
schema_str = "{}" # empty schema to make reference work

generator = Generator()
schemas_for_preprocessing = [json.loads(schema_str)]
generator.preprocess(schemas_for_preprocessing)
schema_str = json.dumps(schemas_for_preprocessing[0])

schema = json.loads(
schema_str.replace("$ref", "dollarref").replace(
# '$' is a special char for root object in jsonpath
Expand All @@ -441,7 +518,6 @@ def _fetch_schema(self, fetchSchemaParam: _FetchSchemaParam = None) -> None:
)
# fix https://github.com/koxudaxi/datamodel-code-generator/issues/1910
)
print(f"Fetch {schema_title}")

jsonpath_expr = parse("$..dollarref")
for match in jsonpath_expr.find(schema):
Expand All @@ -461,10 +537,12 @@ def _fetch_schema(self, fetchSchemaParam: _FetchSchemaParam = None) -> None:
# print(f"replace {match.value} with {value}")
if (
ref_schema_title != schema_title
and ref_schema_title not in fetchSchemaParam.fetched_schema_titles
): # prevent recursion in case of self references
self._fetch_schema(
OSW._FetchSchemaParam(schema_title=ref_schema_title, root=False)
) # resolve references recursive
_param = fetchSchemaParam.copy()
_param.root = False
_param.schema_title = ref_schema_title
self._fetch_schema(_param) # resolve references recursive

model_dir_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "model"
Expand All @@ -480,6 +558,10 @@ def _fetch_schema(self, fetchSchemaParam: _FetchSchemaParam = None) -> None:

# result_model_path = schema_path.replace(".json", ".py")
result_model_path = os.path.join(model_dir_path, "entity.py")
if fetchSchemaParam.result_model_path:
result_model_path = fetchSchemaParam.result_model_path
if not isinstance(result_model_path, str):
result_model_path = str(result_model_path)
temp_model_path = os.path.join(model_dir_path, "temp.py")
if root:
if fetchSchemaParam.legacy_generator:
Expand All @@ -505,7 +587,7 @@ def _fetch_schema(self, fetchSchemaParam: _FetchSchemaParam = None) -> None:
--input {schema_path} \
--input-file-type jsonschema \
--output {temp_model_path} \
--base-class osw.model.static.OswBaseModel \
--base-class opensemantic.OswBaseModel \
--use-default \
--use-unique-items-as-set \
--enum-field-as-literal all \
Expand All @@ -522,25 +604,73 @@ def _fetch_schema(self, fetchSchemaParam: _FetchSchemaParam = None) -> None:
# suppress deprecation warnings from pydantic
# see https://github.com/koxudaxi/datamodel-code-generator/issues/2213
warnings.filterwarnings("ignore", category=PydanticDeprecatedSince20)

if fetchSchemaParam.generate_annotations:
# monkey patch class
datamodel_code_generator.parser.jsonschema.JsonSchemaParser = (
OOLDJsonSchemaParser
)
datamodel_code_generator.generate(
input_=pathlib.Path(schema_path),
input_file_type="jsonschema",
output=pathlib.Path(temp_model_path),
base_class="osw.model.static.OswBaseModel",
base_class="opensemantic.OswBaseModel",
# use_default=True,
apply_default_values_for_required_fields=True,
use_unique_items_as_set=True,
enum_field_as_literal=datamodel_code_generator.LiteralType.All,
# enum_field_as_literal=datamodel_code_generator.LiteralType.All,
enum_field_as_literal="all",
# will create MyEnum(str, Enum) instead of MyEnum(Enum)
use_subclass_enum=True,
set_default_enum_member=True,
use_title_as_name=True,
use_schema_description=True,
use_field_description=True,
encoding="utf-8",
use_double_quotes=True,
collapse_root_models=True,
reuse_model=True,
field_include_all_keys=True,
)
warnings.filterwarnings("default", category=PydanticDeprecatedSince20)

# note: we could use OOLDJsonSchemaParser directly (see below),
# but datamodel_code_generator.generate
# does some pre- and postprocessing we do not want to duplicate

# data_model_type = datamodel_code_generator.DataModelType.PydanticBaseModel
# #data_model_type = DataModelType.PydanticV2BaseModel
# target_python_version = datamodel_code_generator.PythonVersion.PY_38
# data_model_types = datamodel_code_generator.model.get_data_model_types(
# data_model_type, target_python_version
# )
# parser = OOLDJsonSchemaParserFixedRefs(
# source=pathlib.Path(schema_path),

# base_class="opensemantic.OswBaseModel",
# data_model_type=data_model_types.data_model,
# data_model_root_type=data_model_types.root_model,
# data_model_field_type=data_model_types.field_model,
# data_type_manager_type=data_model_types.data_type_manager,
# target_python_version=target_python_version,

# #use_default=True,
# apply_default_values_for_required_fields=True,
# use_unique_items_as_set=True,
# enum_field_as_literal=datamodel_code_generator.LiteralType.All,
# use_title_as_name=True,
# use_schema_description=True,
# use_field_description=True,
# encoding="utf-8",
# use_double_quotes=True,
# collapse_root_models=True,
# reuse_model=True,
# #field_include_all_keys=True
# )
# result = parser.parse()
# with open(temp_model_path, "w", encoding="utf-8") as f:
# f.write(result)

# see https://koxudaxi.github.io/datamodel-code-generator/
# --base-class OswBaseModel: use a custom base class
# --custom-template-dir src/model/template_data/
Expand Down Expand Up @@ -590,8 +720,8 @@ def _fetch_schema(self, fetchSchemaParam: _FetchSchemaParam = None) -> None:
header = (
"from uuid import uuid4\n"
"from typing import Type, TypeVar\n"
"from osw.model.static import OswBaseModel, Ontology\n"
# "from osw.model.static import *\n"
"from opensemantic import OswBaseModel\n"
# "from opensemantic import *\n"
"\n"
)

Expand All @@ -614,7 +744,6 @@ def _fetch_schema(self, fetchSchemaParam: _FetchSchemaParam = None) -> None:
r"class\s*([\S]*)\s*\(\s*[\S\s]*?\s*\)\s*:.*\n"
) # match class definition [\s\S]*(?:[^\S\n]*\n){2,}
for cls in re.findall(pattern, org_content):
print(cls)
content = re.sub(
r"(class\s*"
+ cls
Expand Down
2 changes: 1 addition & 1 deletion src/osw/data/mining.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@
#
# from pydantic import validator
#
# from osw.model.static import OswBaseModel
# from opensemantic import OswBaseModel
# from osw.utils.strings import *
8 changes: 3 additions & 5 deletions src/osw/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@
from pathlib import Path
from typing import List, Union

from pydantic.v1 import PrivateAttr, validator

from osw.model.static import OswBaseModel
from pydantic.v1 import BaseModel, PrivateAttr, validator

PACKAGE_ROOT_PATH = Path(__file__).parents[2]
SRC_PATH = PACKAGE_ROOT_PATH / "src"
Expand All @@ -18,7 +16,7 @@
WIKI_DOMAIN_DEFAULT = "wiki-dev.open-semantic-lab.org"


class FilePathDefault(OswBaseModel):
class FilePathDefault(BaseModel):
"""A class to store the default file path. This is a helper class to make the
default file path, defined within this module, accessible from a calling script."""

Expand Down Expand Up @@ -53,7 +51,7 @@ def get(self):
return self._default


class Defaults(OswBaseModel):
class Defaults(BaseModel):
"""Helper class to create an inheriting classes for storing default values."""

_changed: List[str] = PrivateAttr(default_factory=list)
Expand Down
2 changes: 1 addition & 1 deletion src/osw/express.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from warnings import warn

import requests
from opensemantic import OswBaseModel
from pydantic.v1 import validator
from typing_extensions import (
IO,
Expand All @@ -30,7 +31,6 @@
from osw.core import OSW, OVERWRITE_CLASS_OPTIONS, OverwriteOptions
from osw.defaults import params as default_params
from osw.defaults import paths as default_paths
from osw.model.static import OswBaseModel
from osw.utils.wiki import namespace_from_full_title, title_from_full_title
from osw.wtsite import WtSite

Expand Down
Loading
Loading