Skip to content

♻️ export data feature enhancements #7498

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from common_library.errors_classes import OsparcErrorMixin


class StorageRuntimeError(OsparcErrorMixin, RuntimeError):
...
class StorageRuntimeError(OsparcErrorMixin, RuntimeError): ...


class ConfigurationError(StorageRuntimeError):
Expand Down Expand Up @@ -45,3 +44,7 @@ class InvalidFileIdentifierError(AccessLayerError):

class DatCoreCredentialsMissingError(StorageRuntimeError):
msg_template: str = "DatCore credentials are incomplete. TIP: Check your settings"


class SelectionNotAllowedError(StorageRuntimeError):
msg_template: str = "Selection='{selection}' must share the same parent folder"
11 changes: 9 additions & 2 deletions services/storage/src/simcore_service_storage/simcore_s3_dsm.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
LinkAlreadyExistsError,
ProjectAccessRightError,
ProjectNotFoundError,
SelectionNotAllowedError,
)
from .models import (
DatasetMetaData,
Expand All @@ -81,9 +82,11 @@
from .modules.s3 import get_s3_client
from .utils.s3_utils import S3TransferDataCB
from .utils.simcore_s3_dsm_utils import (
UserSelection,
compute_file_id_prefix,
create_and_upload_export,
create_random_export_name,
ensure_same_parent_in_user_selection,
expand_directory,
get_accessible_project_ids,
get_directory_file_id,
Expand Down Expand Up @@ -1249,7 +1252,11 @@ async def create_s3_export(
*,
progress_bar: ProgressBarData,
) -> StorageFileID:
source_object_keys: set[StorageFileID] = set()
source_object_keys: set[tuple[UserSelection, StorageFileID]] = set()

# ensure all selected items have the same parent
if not ensure_same_parent_in_user_selection(object_keys):
raise SelectionNotAllowedError(selection=object_keys)

# check access rights
for object_key in object_keys:
Expand Down Expand Up @@ -1279,7 +1286,7 @@ async def create_s3_export(
self.simcore_bucket_name, object_key
):
for entry in meta_data_files:
source_object_keys.add(entry.object_key)
source_object_keys.add((object_key, entry.object_key))

_logger.debug(
"User selection '%s' includes '%s' files",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from contextlib import suppress
from pathlib import Path
from typing import TypeAlias
from uuid import uuid4

import orjson
from aws_library.s3 import S3MetaData, SimcoreS3API
from aws_library.s3._constants import STREAM_READER_CHUNK_SIZE
from aws_library.s3._models import S3ObjectKey
from models_library.api_schemas_storage.storage_schemas import S3BucketName
from models_library.projects import ProjectID
from models_library.projects_nodes_io import (
Expand Down Expand Up @@ -143,20 +145,38 @@ def create_random_export_name(user_id: UserID) -> StorageFileID:
)


def ensure_same_parent_in_user_selection(object_keys: list[S3ObjectKey]) -> bool:
parents = [Path(x).parent for x in object_keys]
return len(set(parents)) <= 1


UserSelection: TypeAlias = str


def _strip_parent(selection: UserSelection, s3_object: S3ObjectKey) -> str:
selection_path = Path(selection)
s3_object_path = Path(s3_object)
if selection_path == s3_object_path:
return s3_object_path.name

result = s3_object_path.relative_to(selection_path)
return f"{result}"


async def create_and_upload_export(
s3_client: SimcoreS3API,
bucket: S3BucketName,
*,
source_object_keys: set[StorageFileID],
source_object_keys: set[tuple[UserSelection, StorageFileID]],
destination_object_keys: StorageFileID,
progress_bar: ProgressBarData,
) -> None:
archive_entries: ArchiveEntries = [
(
s3_object,
_strip_parent(selection, s3_object),
await s3_client.get_bytes_streamer_from_object(bucket, s3_object),
)
for s3_object in source_object_keys
for (selection, s3_object) in source_object_keys
]

async with progress_bar:
Expand Down
55 changes: 54 additions & 1 deletion services/storage/tests/unit/test_simcore_s3_dsm_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
from pathlib import Path

import pytest
from simcore_service_storage.utils.simcore_s3_dsm_utils import compute_file_id_prefix
from aws_library.s3._models import S3ObjectKey
from simcore_service_storage.utils.simcore_s3_dsm_utils import (
UserSelection,
_strip_parent,
compute_file_id_prefix,
ensure_same_parent_in_user_selection,
)


@pytest.mark.parametrize(
Expand All @@ -19,3 +27,48 @@
)
def test_compute_file_id_prefix(file_id, levels, expected):
assert compute_file_id_prefix(file_id, levels) == expected


_FOLDERS_PATH = Path("nested/folders/path")


@pytest.mark.parametrize(
"selection, s3_object, expected",
[
(Path("single_file"), Path("single_file"), "single_file"),
(Path("single_folder"), Path("single_folder"), "single_folder"),
(_FOLDERS_PATH / "folder", _FOLDERS_PATH / "folder", "folder"),
(_FOLDERS_PATH / "a_file.txt", _FOLDERS_PATH / "a_file.txt", "a_file.txt"),
(_FOLDERS_PATH, _FOLDERS_PATH / "the/actual/path", "the/actual/path"),
],
)
def test__strip_parent(selection: Path, s3_object: Path, expected: str):
assert (
_strip_parent(UserSelection(f"{selection}"), S3ObjectKey(f"{s3_object}"))
== expected
)


@pytest.mark.parametrize(
"user_slection, expected",
[
([], True),
(["folder"], True),
(["folder", "folder"], True),
(["", ""], True),
([""], True),
([_FOLDERS_PATH / "a", _FOLDERS_PATH / "b"], True),
(["a.txt", "b.txt"], True),
(["a/a.txt"], True),
# not same parent
(["firsta/file", "second/file"], False),
(["a/a.txt", "a.txt", "c.txt", "a/d.txt"], False),
],
)
def test_ensure_same_parent_in_user_selection(
user_slection: list[S3ObjectKey | Path], expected: bool
):
assert (
ensure_same_parent_in_user_selection([f"{x}" for x in user_slection])
== expected
)
Loading