Skip to content

♻️🐛 storage provides presigned download links for files in folders #4573

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 31 commits into from
Aug 8, 2023
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
from typing import Optional

from pydantic import ByteSize


def byte_size_ids(val) -> Optional[str]:
def byte_size_ids(val) -> str | None:
if isinstance(val, ByteSize):
return val.human_readable()
return None
36 changes: 19 additions & 17 deletions services/storage/src/simcore_service_storage/s3_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
import json
import logging
import urllib.parse
from collections.abc import AsyncGenerator, Callable
from contextlib import AsyncExitStack
from dataclasses import dataclass
from pathlib import Path
from typing import AsyncGenerator, Callable, Final, TypeAlias, cast
from typing import Final, TypeAlias, cast

import aioboto3
from aiobotocore.session import ClientCreatorContext
Expand Down Expand Up @@ -350,14 +351,14 @@ async def copy_file(
:type src_file: SimcoreS3FileID
:type dst_file: SimcoreS3FileID
"""
copy_options = dict(
CopySource={"Bucket": bucket, "Key": src_file},
Bucket=bucket,
Key=dst_file,
Config=TransferConfig(max_concurrency=self.transfer_max_concurrency),
)
copy_options = {
"CopySource": {"Bucket": bucket, "Key": src_file},
"Bucket": bucket,
"Key": dst_file,
"Config": TransferConfig(max_concurrency=self.transfer_max_concurrency),
}
if bytes_transfered_cb:
copy_options |= dict(Callback=bytes_transfered_cb)
copy_options |= {"Callback": bytes_transfered_cb}
await self.client.copy(**copy_options)

@s3_exception_handler(_logger)
Expand Down Expand Up @@ -387,10 +388,11 @@ async def list_files(
async def file_exists(self, bucket: S3BucketName, *, s3_object: str) -> bool:
"""Checks if an S3 object exists"""
# SEE https://www.peterbe.com/plog/fastest-way-to-find-out-if-a-file-exists-in-s3
s3_objects, _ = await _list_objects_v2_paginated(
self.client, bucket, s3_object, max_total_items=EXPAND_DIR_MAX_ITEM_COUNT
response = await self.client.list_objects_v2(
Bucket=bucket,
Prefix=s3_object,
)
return len(s3_objects) > 0
return any(obj["Key"] == s3_object for obj in response.get("Contents", []))

@s3_exception_handler(_logger)
async def upload_file(
Expand All @@ -406,13 +408,13 @@ async def upload_file(
:type file: Path
:type file_id: SimcoreS3FileID
"""
upload_options = dict(
Bucket=bucket,
Key=file_id,
Config=TransferConfig(max_concurrency=self.transfer_max_concurrency),
)
upload_options = {
"Bucket": bucket,
"Key": file_id,
"Config": TransferConfig(max_concurrency=self.transfer_max_concurrency),
}
if bytes_transfered_cb:
upload_options |= dict(Callback=bytes_transfered_cb)
upload_options |= {"Callback": bytes_transfered_cb}
await self.client.upload_file(f"{file}", **upload_options)

@staticmethod
Expand Down
75 changes: 60 additions & 15 deletions services/storage/src/simcore_service_storage/simcore_s3_dsm.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,11 @@
from .s3_client import S3MetaData, StorageS3Client
from .s3_utils import S3TransferDataCB, update_task_progress
from .settings import Settings
from .simcore_s3_dsm_utils import expand_directory, get_simcore_directory
from .simcore_s3_dsm_utils import (
expand_directory,
get_directory_file_id,
get_simcore_directory,
)
from .utils import (
convert_db_to_model,
download_to_file_or_raise,
Expand Down Expand Up @@ -383,16 +387,47 @@ async def complete_file_upload(
async def create_file_download_link(
self, user_id: UserID, file_id: StorageFileID, link_type: LinkType
) -> AnyUrl:
async with self.engine.acquire() as conn:
"""
Cases:
1. the file_id maps 1:1 to `file_meta_data` (e.g. it is not a file inside a directory)
2. the file_id represents a file inside a directory (its root path maps 1:1 to a `file_meta_data` defined as a directory)

3. Raises FileNotFoundError if the file does not exist
4. Raises FileAccessRightError if the user does not have access to the file
"""

async def _ensure_access_rights(
conn: SAConnection, storage_file_id: StorageFileID
) -> None:
can: AccessRights | None = await get_file_access_rights(
conn, user_id, file_id
conn, user_id, storage_file_id
)
if not can.read:
# NOTE: this is tricky. A user with read access can download and data!
# If write permission would be required, then shared projects as views cannot
# recover data in nodes (e.g. jupyter cannot pull work data)
#
raise FileAccessRightError(access_right="read", file_id=file_id)
raise FileAccessRightError(access_right="read", file_id=storage_file_id)

async def _get_link(s3_file_id: SimcoreS3FileID) -> AnyUrl:
link: AnyUrl = parse_obj_as(
AnyUrl,
f"s3://{self.simcore_bucket_name}/{urllib.parse.quote(s3_file_id)}",
)
if link_type == LinkType.PRESIGNED:
link = await get_s3_client(
self.app
).create_single_presigned_download_link(
self.simcore_bucket_name,
s3_file_id,
self.settings.STORAGE_DEFAULT_PRESIGNED_LINK_EXPIRATION_SECONDS,
)

return link

async def _get_link_for_file_fmd(conn: SAConnection) -> AnyUrl:
# 1. the file_id maps 1:1 to `file_meta_data`
await _ensure_access_rights(conn, file_id)

fmd = await db_file_meta_data.get(
conn, parse_obj_as(SimcoreS3FileID, file_id)
Expand All @@ -401,18 +436,28 @@ async def create_file_download_link(
# try lazy update
fmd = await self._update_database_from_storage(conn, fmd)

link: AnyUrl = parse_obj_as(
AnyUrl,
f"s3://{self.simcore_bucket_name}/{urllib.parse.quote(fmd.object_name)}",
)
if link_type == LinkType.PRESIGNED:
link = await get_s3_client(self.app).create_single_presigned_download_link(
self.simcore_bucket_name,
fmd.object_name,
self.settings.STORAGE_DEFAULT_PRESIGNED_LINK_EXPIRATION_SECONDS,
)
return await _get_link(fmd.object_name)

return link
async def _get_link_for_directory_fmd(
conn: SAConnection, directory_file_id: SimcoreS3FileID
) -> AnyUrl:
# 2. the file_id represents a file inside a directory
await _ensure_access_rights(conn, directory_file_id)
if not await get_s3_client(self.app).file_exists(
self.simcore_bucket_name, s3_object=f"{file_id}"
):
raise S3KeyNotFoundError(key=file_id, bucket=self.simcore_bucket_name)
return await _get_link(parse_obj_as(SimcoreS3FileID, file_id))

async with self.engine.acquire() as conn:
directory_file_id: SimcoreS3FileID | None = await get_directory_file_id(
conn, file_id
)
return (
await _get_link_for_directory_fmd(conn, directory_file_id)
if directory_file_id
else await _get_link_for_file_fmd(conn)
)

async def delete_file(self, user_id: UserID, file_id: StorageFileID):
async with self.engine.acquire() as conn, conn.begin():
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,18 @@
from contextlib import suppress

from aiohttp import web
from aiopg.sa.connection import SAConnection
from models_library.api_schemas_storage import S3BucketName
from models_library.projects_nodes_io import SimcoreS3DirectoryID, SimcoreS3FileID
from models_library.projects_nodes_io import (
SimcoreS3DirectoryID,
SimcoreS3FileID,
StorageFileID,
)
from pydantic import ByteSize, NonNegativeInt, parse_obj_as
from servicelib.utils import ensure_ends_with

from . import db_file_meta_data
from .exceptions import FileMetaDataNotFoundError
from .models import FileMetaData, FileMetaDataAtDB
from .s3 import get_s3_client
from .s3_client import S3MetaData
Expand Down Expand Up @@ -52,9 +61,42 @@ async def expand_directory(
return result


def get_simcore_directory(file_id: SimcoreS3FileID) -> str:
def get_simcore_directory(file_id: StorageFileID) -> str:
try:
directory_id = SimcoreS3DirectoryID.from_simcore_s3_object(file_id)
except ValueError:
return ""
return f"{directory_id}"


async def get_directory_file_id(
conn: SAConnection, file_id: StorageFileID
) -> SimcoreS3FileID | None:
"""
returns the containing file's `directory_file_id` if the entry exists
in the `file_meta_data` table
"""

async def _get_fmd(
conn: SAConnection, s3_file_id: StorageFileID
) -> FileMetaDataAtDB | None:
with suppress(FileMetaDataNotFoundError):
return await db_file_meta_data.get(
conn, parse_obj_as(SimcoreS3FileID, s3_file_id)
)
return None

provided_file_id_fmd = await _get_fmd(conn, file_id)
if provided_file_id_fmd:
# file_meta_data exists it is not a directory
return None

directory_file_id_str: str = get_simcore_directory(file_id)
if directory_file_id_str == "":
# could not extract a directory name from the provided path
return None

directory_file_id = parse_obj_as(SimcoreS3FileID, directory_file_id_str.rstrip("/"))
directory_file_id_fmd = await _get_fmd(conn, directory_file_id)

return directory_file_id if directory_file_id_fmd else None
1 change: 1 addition & 0 deletions services/storage/src/simcore_service_storage/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ async def download_to_file_or_raise(


def is_file_entry_valid(file_metadata: FileMetaData | FileMetaDataAtDB) -> bool:
"""checks if the file_metadata is valid"""
return (
file_metadata.entity_tag is not None
and file_metadata.file_size > 0
Expand Down
Loading