Skip to content

Commit b253cb0

Browse files
GitHKAndrei Neagu
and
Andrei Neagu
authored
♻️ export data feature enhancements (#7498)
Co-authored-by: Andrei Neagu <[email protected]>
1 parent e838468 commit b253cb0

File tree

6 files changed

+107
-14
lines changed

6 files changed

+107
-14
lines changed

services/storage/src/simcore_service_storage/exceptions/errors.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
from common_library.errors_classes import OsparcErrorMixin
22

33

4-
class StorageRuntimeError(OsparcErrorMixin, RuntimeError):
5-
...
4+
class StorageRuntimeError(OsparcErrorMixin, RuntimeError): ...
65

76

87
class ConfigurationError(StorageRuntimeError):
@@ -45,3 +44,7 @@ class InvalidFileIdentifierError(AccessLayerError):
4544

4645
class DatCoreCredentialsMissingError(StorageRuntimeError):
4746
msg_template: str = "DatCore credentials are incomplete. TIP: Check your settings"
47+
48+
49+
class SelectionNotAllowedError(StorageRuntimeError):
50+
msg_template: str = "Selection='{selection}' must be from the same folder"

services/storage/src/simcore_service_storage/simcore_s3_dsm.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
LinkAlreadyExistsError,
6262
ProjectAccessRightError,
6363
ProjectNotFoundError,
64+
SelectionNotAllowedError,
6465
)
6566
from .models import (
6667
DatasetMetaData,
@@ -81,9 +82,11 @@
8182
from .modules.s3 import get_s3_client
8283
from .utils.s3_utils import S3TransferDataCB
8384
from .utils.simcore_s3_dsm_utils import (
85+
UserSelectionStr,
8486
compute_file_id_prefix,
8587
create_and_upload_export,
8688
create_random_export_name,
89+
ensure_user_selection_from_same_base_directory,
8790
expand_directory,
8891
get_accessible_project_ids,
8992
get_directory_file_id,
@@ -1249,7 +1252,11 @@ async def create_s3_export(
12491252
*,
12501253
progress_bar: ProgressBarData,
12511254
) -> StorageFileID:
1252-
source_object_keys: set[StorageFileID] = set()
1255+
source_object_keys: set[tuple[UserSelectionStr, StorageFileID]] = set()
1256+
1257+
# ensure all selected items have the same parent
1258+
if not ensure_user_selection_from_same_base_directory(object_keys):
1259+
raise SelectionNotAllowedError(selection=object_keys)
12531260

12541261
# check access rights
12551262
for object_key in object_keys:
@@ -1279,7 +1286,7 @@ async def create_s3_export(
12791286
self.simcore_bucket_name, object_key
12801287
):
12811288
for entry in meta_data_files:
1282-
source_object_keys.add(entry.object_key)
1289+
source_object_keys.add((object_key, entry.object_key))
12831290

12841291
_logger.debug(
12851292
"User selection '%s' includes '%s' files",

services/storage/src/simcore_service_storage/utils/simcore_s3_dsm_utils.py

+25-3
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
from contextlib import suppress
22
from pathlib import Path
3+
from typing import TypeAlias
34
from uuid import uuid4
45

56
import orjson
67
from aws_library.s3 import S3MetaData, SimcoreS3API
78
from aws_library.s3._constants import STREAM_READER_CHUNK_SIZE
9+
from aws_library.s3._models import S3ObjectKey
810
from models_library.api_schemas_storage.storage_schemas import S3BucketName
911
from models_library.projects import ProjectID
1012
from models_library.projects_nodes_io import (
@@ -143,20 +145,40 @@ def create_random_export_name(user_id: UserID) -> StorageFileID:
143145
)
144146

145147

148+
def ensure_user_selection_from_same_base_directory(
149+
object_keys: list[S3ObjectKey],
150+
) -> bool:
151+
parents = [Path(x).parent for x in object_keys]
152+
return len(set(parents)) <= 1
153+
154+
155+
UserSelectionStr: TypeAlias = str
156+
157+
158+
def _base_path_parent(base_path: UserSelectionStr, s3_object: S3ObjectKey) -> str:
159+
base_path_parent_path = Path(base_path).parent
160+
s3_object_path = Path(s3_object)
161+
if base_path_parent_path == s3_object_path:
162+
return s3_object_path.name
163+
164+
result = s3_object_path.relative_to(base_path_parent_path)
165+
return f"{result}"
166+
167+
146168
async def create_and_upload_export(
147169
s3_client: SimcoreS3API,
148170
bucket: S3BucketName,
149171
*,
150-
source_object_keys: set[StorageFileID],
172+
source_object_keys: set[tuple[UserSelectionStr, StorageFileID]],
151173
destination_object_keys: StorageFileID,
152174
progress_bar: ProgressBarData,
153175
) -> None:
154176
archive_entries: ArchiveEntries = [
155177
(
156-
s3_object,
178+
_base_path_parent(selection, s3_object),
157179
await s3_client.get_bytes_streamer_from_object(bucket, s3_object),
158180
)
159-
for s3_object in source_object_keys
181+
for (selection, s3_object) in source_object_keys
160182
]
161183

162184
async with progress_bar:

services/storage/tests/unit/test_rpc_handlers_simcore_s3.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -592,17 +592,20 @@ async def test_start_export_data(
592592
):
593593
_, src_projects_list = await random_project_with_files(project_params)
594594

595-
paths_to_export: set[SimcoreS3FileID] = set()
595+
all_available_files: set[SimcoreS3FileID] = set()
596596
for x in src_projects_list.values():
597-
paths_to_export |= x.keys()
597+
all_available_files |= x.keys()
598+
599+
nodes_in_project_to_export = {
600+
TypeAdapter(PathToExport).validate_python("/".join(Path(x).parts[0:2]))
601+
for x in all_available_files
602+
}
598603

599604
result = await _request_start_export_data(
600605
storage_rabbitmq_rpc_client,
601606
user_id,
602607
product_name,
603-
paths_to_export=[
604-
TypeAdapter(PathToExport).validate_python(x) for x in paths_to_export
605-
],
608+
paths_to_export=list(nodes_in_project_to_export),
606609
)
607610

608611
assert re.fullmatch(

services/storage/tests/unit/test_simcore_s3_dsm.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,11 @@ async def test_create_s3_export(
243243
cleanup_files_closure: Callable[[SimcoreS3FileID], None],
244244
):
245245
initial_fmd_count = await _get_fmds_count(sqlalchemy_async_engine)
246-
selection_to_export = _get_folder_and_files_selection(paths_for_export)
246+
all_files_to_export = _get_folder_and_files_selection(paths_for_export)
247+
selection_to_export = {
248+
S3ObjectKey(project_id)
249+
for project_id in {Path(p).parents[-2] for p in all_files_to_export}
250+
}
247251

248252
reports: list[ProgressReport] = []
249253

services/storage/tests/unit/test_simcore_s3_dsm_utils.py

+55-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,13 @@
1+
from pathlib import Path
2+
13
import pytest
2-
from simcore_service_storage.utils.simcore_s3_dsm_utils import compute_file_id_prefix
4+
from aws_library.s3._models import S3ObjectKey
5+
from simcore_service_storage.utils.simcore_s3_dsm_utils import (
6+
UserSelectionStr,
7+
_base_path_parent,
8+
compute_file_id_prefix,
9+
ensure_user_selection_from_same_base_directory,
10+
)
311

412

513
@pytest.mark.parametrize(
@@ -19,3 +27,49 @@
1927
)
2028
def test_compute_file_id_prefix(file_id, levels, expected):
2129
assert compute_file_id_prefix(file_id, levels) == expected
30+
31+
32+
_FOLDERS_PATH = Path("nested/folders/path")
33+
34+
35+
@pytest.mark.parametrize(
36+
"selection, s3_object, expected",
37+
[
38+
("single_file", "single_file", "single_file"),
39+
("single_folder", "single_folder", "single_folder"),
40+
("a/b/c", "a/b/c/d/e/f/g", "c/d/e/f/g"),
41+
(_FOLDERS_PATH / "folder", _FOLDERS_PATH / "folder", "folder"),
42+
(_FOLDERS_PATH / "a_file.txt", _FOLDERS_PATH / "a_file.txt", "a_file.txt"),
43+
(_FOLDERS_PATH, _FOLDERS_PATH / "with/some/content", "path/with/some/content"),
44+
],
45+
)
46+
def test__base_path_parent(selection: Path | str, s3_object: Path, expected: str):
47+
assert (
48+
_base_path_parent(UserSelectionStr(f"{selection}"), S3ObjectKey(f"{s3_object}"))
49+
== expected
50+
)
51+
52+
53+
@pytest.mark.parametrize(
54+
"user_selection, expected",
55+
[
56+
([], True),
57+
(["folder"], True),
58+
(["folder", "folder"], True),
59+
(["", ""], True),
60+
([""], True),
61+
([_FOLDERS_PATH / "a", _FOLDERS_PATH / "b"], True),
62+
(["a.txt", "b.txt"], True),
63+
(["a/a.txt"], True),
64+
# not same parent
65+
(["firsta/file", "second/file"], False),
66+
(["a/a.txt", "a.txt", "c.txt", "a/d.txt"], False),
67+
],
68+
)
69+
def test_ensure_user_selection_from_same_base_directory(
70+
user_selection: list[S3ObjectKey | Path], expected: bool
71+
):
72+
assert (
73+
ensure_user_selection_from_same_base_directory([f"{x}" for x in user_selection])
74+
== expected
75+
)

0 commit comments

Comments
 (0)