From 14dc446fa4f27461c8d9790c38aaf6f98bb1510d Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 9 Apr 2025 15:29:45 +0200 Subject: [PATCH 01/13] only keep selection --- .../simcore_service_storage/simcore_s3_dsm.py | 5 ++-- .../utils/simcore_s3_dsm_utils.py | 21 ++++++++++++-- .../tests/unit/test_simcore_s3_dsm_utils.py | 29 ++++++++++++++++++- 3 files changed, 49 insertions(+), 6 deletions(-) diff --git a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py index 671d9d01cee..e843919b659 100644 --- a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py +++ b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py @@ -81,6 +81,7 @@ from .modules.s3 import get_s3_client from .utils.s3_utils import S3TransferDataCB from .utils.simcore_s3_dsm_utils import ( + UserSelection, compute_file_id_prefix, create_and_upload_export, create_random_export_name, @@ -1249,7 +1250,7 @@ async def create_s3_export( *, progress_bar: ProgressBarData, ) -> StorageFileID: - source_object_keys: set[StorageFileID] = set() + source_object_keys: set[tuple[UserSelection, StorageFileID]] = set() # check access rights for object_key in object_keys: @@ -1279,7 +1280,7 @@ async def create_s3_export( self.simcore_bucket_name, object_key ): for entry in meta_data_files: - source_object_keys.add(entry.object_key) + source_object_keys.add((object_key, entry.object_key)) _logger.debug( "User selection '%s' includes '%s' files", diff --git a/services/storage/src/simcore_service_storage/utils/simcore_s3_dsm_utils.py b/services/storage/src/simcore_service_storage/utils/simcore_s3_dsm_utils.py index 5ebe83c486b..f3afc033d0e 100644 --- a/services/storage/src/simcore_service_storage/utils/simcore_s3_dsm_utils.py +++ b/services/storage/src/simcore_service_storage/utils/simcore_s3_dsm_utils.py @@ -1,10 +1,12 @@ from contextlib import suppress from pathlib import Path +from typing import TypeAlias from uuid import uuid4 import orjson from aws_library.s3 import S3MetaData, SimcoreS3API from aws_library.s3._constants import STREAM_READER_CHUNK_SIZE +from aws_library.s3._models import S3ObjectKey from models_library.api_schemas_storage.storage_schemas import S3BucketName from models_library.projects import ProjectID from models_library.projects_nodes_io import ( @@ -143,20 +145,33 @@ def create_random_export_name(user_id: UserID) -> StorageFileID: ) +UserSelection: TypeAlias = str + + +def _strip_parent(selection: UserSelection, s3_object: S3ObjectKey) -> str: + selection_path = Path(selection) + s3_object_path = Path(s3_object) + if selection_path == s3_object_path: + return s3_object_path.name + + result = s3_object_path.relative_to(selection_path) + return f"{result}" + + async def create_and_upload_export( s3_client: SimcoreS3API, bucket: S3BucketName, *, - source_object_keys: set[StorageFileID], + source_object_keys: set[tuple[UserSelection, StorageFileID]], destination_object_keys: StorageFileID, progress_bar: ProgressBarData, ) -> None: archive_entries: ArchiveEntries = [ ( - s3_object, + _strip_parent(selection, s3_object), await s3_client.get_bytes_streamer_from_object(bucket, s3_object), ) - for s3_object in source_object_keys + for (selection, s3_object) in source_object_keys ] async with progress_bar: diff --git a/services/storage/tests/unit/test_simcore_s3_dsm_utils.py b/services/storage/tests/unit/test_simcore_s3_dsm_utils.py index 74c79a8cf36..a7bd45032e9 100644 --- a/services/storage/tests/unit/test_simcore_s3_dsm_utils.py +++ b/services/storage/tests/unit/test_simcore_s3_dsm_utils.py @@ -1,5 +1,12 @@ +from pathlib import Path + import pytest -from simcore_service_storage.utils.simcore_s3_dsm_utils import compute_file_id_prefix +from aws_library.s3._models import S3ObjectKey +from simcore_service_storage.utils.simcore_s3_dsm_utils import ( + UserSelection, + _strip_parent, + compute_file_id_prefix, +) @pytest.mark.parametrize( @@ -19,3 +26,23 @@ ) def test_compute_file_id_prefix(file_id, levels, expected): assert compute_file_id_prefix(file_id, levels) == expected + + +_FOLDERS_PATH = Path("nested/folders/path") + + +@pytest.mark.parametrize( + "selection, s3_object, expected", + [ + (Path("single_file"), Path("single_file"), "single_file"), + (Path("single_folder"), Path("single_folder"), "single_folder"), + (_FOLDERS_PATH / "folder", _FOLDERS_PATH / "folder", "folder"), + (_FOLDERS_PATH / "a_file.txt", _FOLDERS_PATH / "a_file.txt", "a_file.txt"), + (_FOLDERS_PATH, _FOLDERS_PATH / "the/actual/path", "the/actual/path"), + ], +) +def test__strip_parent(selection: Path, s3_object: Path, expected: str): + assert ( + _strip_parent(UserSelection(f"{selection}"), S3ObjectKey(f"{s3_object}")) + == expected + ) From a23dab250af23d29a03f31e717cbf50004886e03 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 9 Apr 2025 16:02:32 +0200 Subject: [PATCH 02/13] ensure same parent is in palce --- .../exceptions/errors.py | 7 ++++-- .../simcore_service_storage/simcore_s3_dsm.py | 6 +++++ .../utils/simcore_s3_dsm_utils.py | 5 ++++ .../tests/unit/test_simcore_s3_dsm_utils.py | 25 +++++++++++++++++++ 4 files changed, 41 insertions(+), 2 deletions(-) diff --git a/services/storage/src/simcore_service_storage/exceptions/errors.py b/services/storage/src/simcore_service_storage/exceptions/errors.py index 5856a2fec5b..ecdbeef9e2f 100644 --- a/services/storage/src/simcore_service_storage/exceptions/errors.py +++ b/services/storage/src/simcore_service_storage/exceptions/errors.py @@ -1,8 +1,7 @@ from common_library.errors_classes import OsparcErrorMixin -class StorageRuntimeError(OsparcErrorMixin, RuntimeError): - ... +class StorageRuntimeError(OsparcErrorMixin, RuntimeError): ... class ConfigurationError(StorageRuntimeError): @@ -45,3 +44,7 @@ class InvalidFileIdentifierError(AccessLayerError): class DatCoreCredentialsMissingError(StorageRuntimeError): msg_template: str = "DatCore credentials are incomplete. TIP: Check your settings" + + +class SelectionNotAllowedError(StorageRuntimeError): + msg_template: str = "Selection='{selection}' must share the same parent folder" diff --git a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py index e843919b659..13aad0e65e6 100644 --- a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py +++ b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py @@ -61,6 +61,7 @@ LinkAlreadyExistsError, ProjectAccessRightError, ProjectNotFoundError, + SelectionNotAllowedError, ) from .models import ( DatasetMetaData, @@ -85,6 +86,7 @@ compute_file_id_prefix, create_and_upload_export, create_random_export_name, + ensure_same_paret_in_user_selection, expand_directory, get_accessible_project_ids, get_directory_file_id, @@ -1252,6 +1254,10 @@ async def create_s3_export( ) -> StorageFileID: source_object_keys: set[tuple[UserSelection, StorageFileID]] = set() + # ensure all selected items have the same parent + if not ensure_same_paret_in_user_selection(object_keys): + raise SelectionNotAllowedError(selection=object_keys) + # check access rights for object_key in object_keys: project_id = None diff --git a/services/storage/src/simcore_service_storage/utils/simcore_s3_dsm_utils.py b/services/storage/src/simcore_service_storage/utils/simcore_s3_dsm_utils.py index f3afc033d0e..82fcc01d56e 100644 --- a/services/storage/src/simcore_service_storage/utils/simcore_s3_dsm_utils.py +++ b/services/storage/src/simcore_service_storage/utils/simcore_s3_dsm_utils.py @@ -145,6 +145,11 @@ def create_random_export_name(user_id: UserID) -> StorageFileID: ) +def ensure_same_paret_in_user_selection(object_keys: list[S3ObjectKey]) -> bool: + parents = [Path(x).parent for x in object_keys] + return len(set(parents)) <= 1 + + UserSelection: TypeAlias = str diff --git a/services/storage/tests/unit/test_simcore_s3_dsm_utils.py b/services/storage/tests/unit/test_simcore_s3_dsm_utils.py index a7bd45032e9..035b371455c 100644 --- a/services/storage/tests/unit/test_simcore_s3_dsm_utils.py +++ b/services/storage/tests/unit/test_simcore_s3_dsm_utils.py @@ -6,6 +6,7 @@ UserSelection, _strip_parent, compute_file_id_prefix, + ensure_same_paret_in_user_selection, ) @@ -46,3 +47,27 @@ def test__strip_parent(selection: Path, s3_object: Path, expected: str): _strip_parent(UserSelection(f"{selection}"), S3ObjectKey(f"{s3_object}")) == expected ) + + +@pytest.mark.parametrize( + "user_slection, expected", + [ + ([], True), + (["folder"], True), + (["folder", "folder"], True), + (["", ""], True), + ([""], True), + ([_FOLDERS_PATH / "a", _FOLDERS_PATH / "b"], True), + (["a.txt", "b.txt"], True), + (["a/a.txt"], True), + # not same parent + (["firsta/file", "second/file"], False), + (["a/a.txt", "a.txt", "c.txt", "a/d.txt"], False), + ], +) +def test_ensure_same_paret_in_user_selection( + user_slection: list[S3ObjectKey | Path], expected: bool +): + assert ( + ensure_same_paret_in_user_selection([f"{x}" for x in user_slection]) == expected + ) From f9886c7d142e06eefab483be12e4596bacb3808a Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 9 Apr 2025 17:21:29 +0200 Subject: [PATCH 03/13] typo --- .../storage/src/simcore_service_storage/simcore_s3_dsm.py | 4 ++-- .../simcore_service_storage/utils/simcore_s3_dsm_utils.py | 2 +- services/storage/tests/unit/test_simcore_s3_dsm_utils.py | 7 ++++--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py index 13aad0e65e6..e1bf0067b78 100644 --- a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py +++ b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py @@ -86,7 +86,7 @@ compute_file_id_prefix, create_and_upload_export, create_random_export_name, - ensure_same_paret_in_user_selection, + ensure_same_parent_in_user_selection, expand_directory, get_accessible_project_ids, get_directory_file_id, @@ -1255,7 +1255,7 @@ async def create_s3_export( source_object_keys: set[tuple[UserSelection, StorageFileID]] = set() # ensure all selected items have the same parent - if not ensure_same_paret_in_user_selection(object_keys): + if not ensure_same_parent_in_user_selection(object_keys): raise SelectionNotAllowedError(selection=object_keys) # check access rights diff --git a/services/storage/src/simcore_service_storage/utils/simcore_s3_dsm_utils.py b/services/storage/src/simcore_service_storage/utils/simcore_s3_dsm_utils.py index 82fcc01d56e..88808c63b21 100644 --- a/services/storage/src/simcore_service_storage/utils/simcore_s3_dsm_utils.py +++ b/services/storage/src/simcore_service_storage/utils/simcore_s3_dsm_utils.py @@ -145,7 +145,7 @@ def create_random_export_name(user_id: UserID) -> StorageFileID: ) -def ensure_same_paret_in_user_selection(object_keys: list[S3ObjectKey]) -> bool: +def ensure_same_parent_in_user_selection(object_keys: list[S3ObjectKey]) -> bool: parents = [Path(x).parent for x in object_keys] return len(set(parents)) <= 1 diff --git a/services/storage/tests/unit/test_simcore_s3_dsm_utils.py b/services/storage/tests/unit/test_simcore_s3_dsm_utils.py index 035b371455c..2ffdfd0d453 100644 --- a/services/storage/tests/unit/test_simcore_s3_dsm_utils.py +++ b/services/storage/tests/unit/test_simcore_s3_dsm_utils.py @@ -6,7 +6,7 @@ UserSelection, _strip_parent, compute_file_id_prefix, - ensure_same_paret_in_user_selection, + ensure_same_parent_in_user_selection, ) @@ -65,9 +65,10 @@ def test__strip_parent(selection: Path, s3_object: Path, expected: str): (["a/a.txt", "a.txt", "c.txt", "a/d.txt"], False), ], ) -def test_ensure_same_paret_in_user_selection( +def test_ensure_same_parent_in_user_selection( user_slection: list[S3ObjectKey | Path], expected: bool ): assert ( - ensure_same_paret_in_user_selection([f"{x}" for x in user_slection]) == expected + ensure_same_parent_in_user_selection([f"{x}" for x in user_slection]) + == expected ) From c38632e4770abfbf977cea1e4a2cde188f6382c9 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 10 Apr 2025 08:03:18 +0200 Subject: [PATCH 04/13] rename --- .../storage/src/simcore_service_storage/simcore_s3_dsm.py | 4 ++-- .../simcore_service_storage/utils/simcore_s3_dsm_utils.py | 6 +++--- services/storage/tests/unit/test_simcore_s3_dsm_utils.py | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py index e1bf0067b78..f6b5aff4581 100644 --- a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py +++ b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py @@ -82,7 +82,7 @@ from .modules.s3 import get_s3_client from .utils.s3_utils import S3TransferDataCB from .utils.simcore_s3_dsm_utils import ( - UserSelection, + UserSelectionStr, compute_file_id_prefix, create_and_upload_export, create_random_export_name, @@ -1252,7 +1252,7 @@ async def create_s3_export( *, progress_bar: ProgressBarData, ) -> StorageFileID: - source_object_keys: set[tuple[UserSelection, StorageFileID]] = set() + source_object_keys: set[tuple[UserSelectionStr, StorageFileID]] = set() # ensure all selected items have the same parent if not ensure_same_parent_in_user_selection(object_keys): diff --git a/services/storage/src/simcore_service_storage/utils/simcore_s3_dsm_utils.py b/services/storage/src/simcore_service_storage/utils/simcore_s3_dsm_utils.py index 88808c63b21..5ab48a5a76f 100644 --- a/services/storage/src/simcore_service_storage/utils/simcore_s3_dsm_utils.py +++ b/services/storage/src/simcore_service_storage/utils/simcore_s3_dsm_utils.py @@ -150,10 +150,10 @@ def ensure_same_parent_in_user_selection(object_keys: list[S3ObjectKey]) -> bool return len(set(parents)) <= 1 -UserSelection: TypeAlias = str +UserSelectionStr: TypeAlias = str -def _strip_parent(selection: UserSelection, s3_object: S3ObjectKey) -> str: +def _strip_parent(selection: UserSelectionStr, s3_object: S3ObjectKey) -> str: selection_path = Path(selection) s3_object_path = Path(s3_object) if selection_path == s3_object_path: @@ -167,7 +167,7 @@ async def create_and_upload_export( s3_client: SimcoreS3API, bucket: S3BucketName, *, - source_object_keys: set[tuple[UserSelection, StorageFileID]], + source_object_keys: set[tuple[UserSelectionStr, StorageFileID]], destination_object_keys: StorageFileID, progress_bar: ProgressBarData, ) -> None: diff --git a/services/storage/tests/unit/test_simcore_s3_dsm_utils.py b/services/storage/tests/unit/test_simcore_s3_dsm_utils.py index 2ffdfd0d453..d7e793882b6 100644 --- a/services/storage/tests/unit/test_simcore_s3_dsm_utils.py +++ b/services/storage/tests/unit/test_simcore_s3_dsm_utils.py @@ -3,7 +3,7 @@ import pytest from aws_library.s3._models import S3ObjectKey from simcore_service_storage.utils.simcore_s3_dsm_utils import ( - UserSelection, + UserSelectionStr, _strip_parent, compute_file_id_prefix, ensure_same_parent_in_user_selection, @@ -44,7 +44,7 @@ def test_compute_file_id_prefix(file_id, levels, expected): ) def test__strip_parent(selection: Path, s3_object: Path, expected: str): assert ( - _strip_parent(UserSelection(f"{selection}"), S3ObjectKey(f"{s3_object}")) + _strip_parent(UserSelectionStr(f"{selection}"), S3ObjectKey(f"{s3_object}")) == expected ) From d6e159c241262af4a209b3e6de29f7d7445e3746 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 10 Apr 2025 08:13:37 +0200 Subject: [PATCH 05/13] fixed selection --- .../src/simcore_service_storage/utils/simcore_s3_dsm_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/storage/src/simcore_service_storage/utils/simcore_s3_dsm_utils.py b/services/storage/src/simcore_service_storage/utils/simcore_s3_dsm_utils.py index 5ab48a5a76f..7bf8e92b24b 100644 --- a/services/storage/src/simcore_service_storage/utils/simcore_s3_dsm_utils.py +++ b/services/storage/src/simcore_service_storage/utils/simcore_s3_dsm_utils.py @@ -154,7 +154,7 @@ def ensure_same_parent_in_user_selection(object_keys: list[S3ObjectKey]) -> bool def _strip_parent(selection: UserSelectionStr, s3_object: S3ObjectKey) -> str: - selection_path = Path(selection) + selection_path = Path(selection).parent s3_object_path = Path(s3_object) if selection_path == s3_object_path: return s3_object_path.name From 8dd644ecf3a1b398967a77127658d06db0131582 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 10 Apr 2025 08:35:24 +0200 Subject: [PATCH 06/13] reword error --- .../storage/src/simcore_service_storage/exceptions/errors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/storage/src/simcore_service_storage/exceptions/errors.py b/services/storage/src/simcore_service_storage/exceptions/errors.py index ecdbeef9e2f..93dc85ddcd3 100644 --- a/services/storage/src/simcore_service_storage/exceptions/errors.py +++ b/services/storage/src/simcore_service_storage/exceptions/errors.py @@ -47,4 +47,4 @@ class DatCoreCredentialsMissingError(StorageRuntimeError): class SelectionNotAllowedError(StorageRuntimeError): - msg_template: str = "Selection='{selection}' must share the same parent folder" + msg_template: str = "Selection='{selection}' must be from the same folder" From 0bd89fd497914d30f564080dc70ac353bf95bd5a Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 10 Apr 2025 08:39:11 +0200 Subject: [PATCH 07/13] typo --- services/storage/tests/unit/test_simcore_s3_dsm_utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/services/storage/tests/unit/test_simcore_s3_dsm_utils.py b/services/storage/tests/unit/test_simcore_s3_dsm_utils.py index d7e793882b6..7bf5043a97e 100644 --- a/services/storage/tests/unit/test_simcore_s3_dsm_utils.py +++ b/services/storage/tests/unit/test_simcore_s3_dsm_utils.py @@ -50,7 +50,7 @@ def test__strip_parent(selection: Path, s3_object: Path, expected: str): @pytest.mark.parametrize( - "user_slection, expected", + "user_selection, expected", [ ([], True), (["folder"], True), @@ -66,9 +66,9 @@ def test__strip_parent(selection: Path, s3_object: Path, expected: str): ], ) def test_ensure_same_parent_in_user_selection( - user_slection: list[S3ObjectKey | Path], expected: bool + user_selection: list[S3ObjectKey | Path], expected: bool ): assert ( - ensure_same_parent_in_user_selection([f"{x}" for x in user_slection]) + ensure_same_parent_in_user_selection([f"{x}" for x in user_selection]) == expected ) From a55af67a7f885ab44539302b9d8484063e0bca8c Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 10 Apr 2025 08:43:52 +0200 Subject: [PATCH 08/13] fixed tests --- services/storage/tests/unit/test_simcore_s3_dsm_utils.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/services/storage/tests/unit/test_simcore_s3_dsm_utils.py b/services/storage/tests/unit/test_simcore_s3_dsm_utils.py index 7bf5043a97e..21e135b16ec 100644 --- a/services/storage/tests/unit/test_simcore_s3_dsm_utils.py +++ b/services/storage/tests/unit/test_simcore_s3_dsm_utils.py @@ -35,14 +35,15 @@ def test_compute_file_id_prefix(file_id, levels, expected): @pytest.mark.parametrize( "selection, s3_object, expected", [ - (Path("single_file"), Path("single_file"), "single_file"), - (Path("single_folder"), Path("single_folder"), "single_folder"), + ("single_file", "single_file", "single_file"), + ("single_folder", "single_folder", "single_folder"), + ("a/b/c", "a/b/c/d/e/f/g", "c/d/e/f/g"), (_FOLDERS_PATH / "folder", _FOLDERS_PATH / "folder", "folder"), (_FOLDERS_PATH / "a_file.txt", _FOLDERS_PATH / "a_file.txt", "a_file.txt"), - (_FOLDERS_PATH, _FOLDERS_PATH / "the/actual/path", "the/actual/path"), + (_FOLDERS_PATH, _FOLDERS_PATH / "the/actual/path", "path/the/actual/path"), ], ) -def test__strip_parent(selection: Path, s3_object: Path, expected: str): +def test__strip_parent(selection: Path | str, s3_object: Path, expected: str): assert ( _strip_parent(UserSelectionStr(f"{selection}"), S3ObjectKey(f"{s3_object}")) == expected From dfce2c39b92ed94cd4fd9c21bc0d467228f49cb7 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 10 Apr 2025 08:44:26 +0200 Subject: [PATCH 09/13] renamed example --- services/storage/tests/unit/test_simcore_s3_dsm_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/storage/tests/unit/test_simcore_s3_dsm_utils.py b/services/storage/tests/unit/test_simcore_s3_dsm_utils.py index 21e135b16ec..7a2aa377f93 100644 --- a/services/storage/tests/unit/test_simcore_s3_dsm_utils.py +++ b/services/storage/tests/unit/test_simcore_s3_dsm_utils.py @@ -40,7 +40,7 @@ def test_compute_file_id_prefix(file_id, levels, expected): ("a/b/c", "a/b/c/d/e/f/g", "c/d/e/f/g"), (_FOLDERS_PATH / "folder", _FOLDERS_PATH / "folder", "folder"), (_FOLDERS_PATH / "a_file.txt", _FOLDERS_PATH / "a_file.txt", "a_file.txt"), - (_FOLDERS_PATH, _FOLDERS_PATH / "the/actual/path", "path/the/actual/path"), + (_FOLDERS_PATH, _FOLDERS_PATH / "with/some/content", "path/with/some/content"), ], ) def test__strip_parent(selection: Path | str, s3_object: Path, expected: str): From fc810be106ab80ba03782ee9b56330411ca849b9 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 10 Apr 2025 08:48:30 +0200 Subject: [PATCH 10/13] rename --- .../utils/simcore_s3_dsm_utils.py | 10 +++++----- .../storage/tests/unit/test_simcore_s3_dsm_utils.py | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/services/storage/src/simcore_service_storage/utils/simcore_s3_dsm_utils.py b/services/storage/src/simcore_service_storage/utils/simcore_s3_dsm_utils.py index 7bf8e92b24b..048e74a9aaa 100644 --- a/services/storage/src/simcore_service_storage/utils/simcore_s3_dsm_utils.py +++ b/services/storage/src/simcore_service_storage/utils/simcore_s3_dsm_utils.py @@ -153,13 +153,13 @@ def ensure_same_parent_in_user_selection(object_keys: list[S3ObjectKey]) -> bool UserSelectionStr: TypeAlias = str -def _strip_parent(selection: UserSelectionStr, s3_object: S3ObjectKey) -> str: - selection_path = Path(selection).parent +def _base_path_parent(base_path: UserSelectionStr, s3_object: S3ObjectKey) -> str: + base_path_parent_path = Path(base_path).parent s3_object_path = Path(s3_object) - if selection_path == s3_object_path: + if base_path_parent_path == s3_object_path: return s3_object_path.name - result = s3_object_path.relative_to(selection_path) + result = s3_object_path.relative_to(base_path_parent_path) return f"{result}" @@ -173,7 +173,7 @@ async def create_and_upload_export( ) -> None: archive_entries: ArchiveEntries = [ ( - _strip_parent(selection, s3_object), + _base_path_parent(selection, s3_object), await s3_client.get_bytes_streamer_from_object(bucket, s3_object), ) for (selection, s3_object) in source_object_keys diff --git a/services/storage/tests/unit/test_simcore_s3_dsm_utils.py b/services/storage/tests/unit/test_simcore_s3_dsm_utils.py index 7a2aa377f93..c576057c169 100644 --- a/services/storage/tests/unit/test_simcore_s3_dsm_utils.py +++ b/services/storage/tests/unit/test_simcore_s3_dsm_utils.py @@ -4,7 +4,7 @@ from aws_library.s3._models import S3ObjectKey from simcore_service_storage.utils.simcore_s3_dsm_utils import ( UserSelectionStr, - _strip_parent, + _base_path_parent, compute_file_id_prefix, ensure_same_parent_in_user_selection, ) @@ -43,9 +43,9 @@ def test_compute_file_id_prefix(file_id, levels, expected): (_FOLDERS_PATH, _FOLDERS_PATH / "with/some/content", "path/with/some/content"), ], ) -def test__strip_parent(selection: Path | str, s3_object: Path, expected: str): +def test__base_path_parent(selection: Path | str, s3_object: Path, expected: str): assert ( - _strip_parent(UserSelectionStr(f"{selection}"), S3ObjectKey(f"{s3_object}")) + _base_path_parent(UserSelectionStr(f"{selection}"), S3ObjectKey(f"{s3_object}")) == expected ) From 08bc95a0b831560ccfe7f598152f4ea0e55af67d Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 10 Apr 2025 08:50:08 +0200 Subject: [PATCH 11/13] rename --- .../storage/src/simcore_service_storage/simcore_s3_dsm.py | 4 ++-- .../simcore_service_storage/utils/simcore_s3_dsm_utils.py | 4 +++- services/storage/tests/unit/test_simcore_s3_dsm_utils.py | 6 +++--- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py index f6b5aff4581..61657117637 100644 --- a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py +++ b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py @@ -86,7 +86,7 @@ compute_file_id_prefix, create_and_upload_export, create_random_export_name, - ensure_same_parent_in_user_selection, + ensure_user_selection_from_same_base_directory, expand_directory, get_accessible_project_ids, get_directory_file_id, @@ -1255,7 +1255,7 @@ async def create_s3_export( source_object_keys: set[tuple[UserSelectionStr, StorageFileID]] = set() # ensure all selected items have the same parent - if not ensure_same_parent_in_user_selection(object_keys): + if not ensure_user_selection_from_same_base_directory(object_keys): raise SelectionNotAllowedError(selection=object_keys) # check access rights diff --git a/services/storage/src/simcore_service_storage/utils/simcore_s3_dsm_utils.py b/services/storage/src/simcore_service_storage/utils/simcore_s3_dsm_utils.py index 048e74a9aaa..5764151fdf1 100644 --- a/services/storage/src/simcore_service_storage/utils/simcore_s3_dsm_utils.py +++ b/services/storage/src/simcore_service_storage/utils/simcore_s3_dsm_utils.py @@ -145,7 +145,9 @@ def create_random_export_name(user_id: UserID) -> StorageFileID: ) -def ensure_same_parent_in_user_selection(object_keys: list[S3ObjectKey]) -> bool: +def ensure_user_selection_from_same_base_directory( + object_keys: list[S3ObjectKey], +) -> bool: parents = [Path(x).parent for x in object_keys] return len(set(parents)) <= 1 diff --git a/services/storage/tests/unit/test_simcore_s3_dsm_utils.py b/services/storage/tests/unit/test_simcore_s3_dsm_utils.py index c576057c169..56f7d9bd92a 100644 --- a/services/storage/tests/unit/test_simcore_s3_dsm_utils.py +++ b/services/storage/tests/unit/test_simcore_s3_dsm_utils.py @@ -6,7 +6,7 @@ UserSelectionStr, _base_path_parent, compute_file_id_prefix, - ensure_same_parent_in_user_selection, + ensure_user_selection_from_same_base_directory, ) @@ -66,10 +66,10 @@ def test__base_path_parent(selection: Path | str, s3_object: Path, expected: str (["a/a.txt", "a.txt", "c.txt", "a/d.txt"], False), ], ) -def test_ensure_same_parent_in_user_selection( +def test_ensure_user_selection_from_same_base_directory( user_selection: list[S3ObjectKey | Path], expected: bool ): assert ( - ensure_same_parent_in_user_selection([f"{x}" for x in user_selection]) + ensure_user_selection_from_same_base_directory([f"{x}" for x in user_selection]) == expected ) From 2efadc098c207ab11e9a5561519201a3d029abdd Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 10 Apr 2025 09:17:14 +0200 Subject: [PATCH 12/13] fixed test --- .../tests/unit/test_rpc_handlers_simcore_s3.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/services/storage/tests/unit/test_rpc_handlers_simcore_s3.py b/services/storage/tests/unit/test_rpc_handlers_simcore_s3.py index 3cd57155e29..d7ed04de9b9 100644 --- a/services/storage/tests/unit/test_rpc_handlers_simcore_s3.py +++ b/services/storage/tests/unit/test_rpc_handlers_simcore_s3.py @@ -592,17 +592,20 @@ async def test_start_export_data( ): _, src_projects_list = await random_project_with_files(project_params) - paths_to_export: set[SimcoreS3FileID] = set() + all_available_files: set[SimcoreS3FileID] = set() for x in src_projects_list.values(): - paths_to_export |= x.keys() + all_available_files |= x.keys() + + nodes_in_project_to_export = { + TypeAdapter(PathToExport).validate_python("/".join(Path(x).parts[0:2])) + for x in all_available_files + } result = await _request_start_export_data( storage_rabbitmq_rpc_client, user_id, product_name, - paths_to_export=[ - TypeAdapter(PathToExport).validate_python(x) for x in paths_to_export - ], + paths_to_export=list(nodes_in_project_to_export), ) assert re.fullmatch( From d54efc758c47a66b5b92a140eacd286697d6c28c Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 10 Apr 2025 09:24:47 +0200 Subject: [PATCH 13/13] refactor --- services/storage/tests/unit/test_simcore_s3_dsm.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/services/storage/tests/unit/test_simcore_s3_dsm.py b/services/storage/tests/unit/test_simcore_s3_dsm.py index df4d00e9db2..fdde44a8663 100644 --- a/services/storage/tests/unit/test_simcore_s3_dsm.py +++ b/services/storage/tests/unit/test_simcore_s3_dsm.py @@ -243,7 +243,11 @@ async def test_create_s3_export( cleanup_files_closure: Callable[[SimcoreS3FileID], None], ): initial_fmd_count = await _get_fmds_count(sqlalchemy_async_engine) - selection_to_export = _get_folder_and_files_selection(paths_for_export) + all_files_to_export = _get_folder_and_files_selection(paths_for_export) + selection_to_export = { + S3ObjectKey(project_id) + for project_id in {Path(p).parents[-2] for p in all_files_to_export} + } reports: list[ProgressReport] = []