|
1 |
| -# pylint:disable=unused-argument |
2 |
| -# pylint:disable=redefined-outer-name |
3 |
| -# pylint:disable=no-name-in-module |
| 1 | +# pylint:disable=redefined-outer-name,unused-argument |
4 | 2 |
|
5 | 3 | import asyncio
|
6 | 4 | import hashlib
|
7 | 5 | import os
|
8 | 6 | import random
|
9 | 7 | import secrets
|
10 | 8 | import string
|
11 |
| -import sys |
| 9 | +import tempfile |
12 | 10 | import uuid
|
13 | 11 | from concurrent.futures import ProcessPoolExecutor
|
14 | 12 | from pathlib import Path
|
15 |
| -from typing import Dict, Iterator, List, Set |
| 13 | +from typing import Dict, Iterator, List, Set, Tuple |
16 | 14 |
|
17 | 15 | import pytest
|
18 | 16 | from simcore_service_webserver.exporter.archiving import (
|
|
25 | 23 |
|
26 | 24 |
|
27 | 25 | @pytest.fixture
|
28 |
| -async def monkey_patch_asyncio_subporcess(loop, mocker): |
29 |
| - # TODO: The below bug is not allowing me to fully test, |
30 |
| - # mocking and waiting for an update |
31 |
| - # https://bugs.python.org/issue35621 |
32 |
| - # this issue was patched in 3.8, no need |
33 |
| - if sys.version_info.major == 3 and sys.version_info.minor >= 8: |
34 |
| - raise RuntimeError( |
35 |
| - "Issue no longer present in this version of python, " |
36 |
| - "please remote this mock on python >= 3.8" |
37 |
| - ) |
38 |
| - |
39 |
| - import subprocess |
40 |
| - |
41 |
| - async def create_subprocess_exec(*command, **extra_params): |
42 |
| - class MockResponse: |
43 |
| - def __init__(self, command, **kwargs): |
44 |
| - self.proc = subprocess.Popen(command, **extra_params) |
45 |
| - |
46 |
| - async def communicate(self): |
47 |
| - return self.proc.communicate() |
48 |
| - |
49 |
| - @property |
50 |
| - def returncode(self): |
51 |
| - return self.proc.returncode |
52 |
| - |
53 |
| - mock_response = MockResponse(command, **extra_params) |
| 26 | +def temp_dir(tmpdir) -> Path: |
| 27 | + # cast to Path object |
| 28 | + return Path(tmpdir) |
54 | 29 |
|
55 |
| - return mock_response |
56 | 30 |
|
57 |
| - mocker.patch("asyncio.create_subprocess_exec", side_effect=create_subprocess_exec) |
| 31 | +@pytest.fixture |
| 32 | +def temp_dir2() -> Iterator[Path]: |
| 33 | + with tempfile.TemporaryDirectory() as temp_dir: |
| 34 | + temp_dir_path = Path(temp_dir) |
| 35 | + extract_dir_path = temp_dir_path / "extract_dir" |
| 36 | + extract_dir_path.mkdir(parents=True, exist_ok=True) |
| 37 | + yield extract_dir_path |
58 | 38 |
|
59 | 39 |
|
60 | 40 | @pytest.fixture
|
61 |
| -def temp_dir(tmpdir) -> Path: |
62 |
| - # Casts https://docs.pytest.org/en/stable/tmpdir.html#the-tmpdir-fixture to Path |
63 |
| - return Path(tmpdir) |
| 41 | +def temp_file() -> Iterator[Path]: |
| 42 | + file_path = Path("/") / f"tmp/{next(tempfile._get_candidate_names())}" |
| 43 | + file_path.write_text("test_data") |
| 44 | + yield file_path |
| 45 | + file_path.unlink() |
64 | 46 |
|
65 | 47 |
|
66 | 48 | @pytest.fixture
|
67 |
| -def project_uuid() -> str: |
| 49 | +def project_uuid(): |
68 | 50 | return str(uuid.uuid4())
|
69 | 51 |
|
70 | 52 |
|
71 | 53 | @pytest.fixture
|
72 |
| -def dir_with_random_content() -> Path: |
| 54 | +def dir_with_random_content() -> Iterator[Path]: |
73 | 55 | def random_string(length: int) -> str:
|
74 | 56 | return "".join(secrets.choice(string.ascii_letters) for i in range(length))
|
75 | 57 |
|
@@ -101,7 +83,7 @@ def make_subdirectories_with_content(
|
101 | 83 | )
|
102 | 84 |
|
103 | 85 | def get_dirs_and_subdris_in_path(path_to_scan: Path) -> Iterator[Path]:
|
104 |
| - return [path for path in path_to_scan.rglob("*") if path.is_dir()] |
| 86 | + return (path for path in path_to_scan.rglob("*") if path.is_dir()) |
105 | 87 |
|
106 | 88 | with tempfile.TemporaryDirectory() as temp_dir:
|
107 | 89 | temp_dir_path = Path(temp_dir)
|
@@ -154,6 +136,80 @@ def temp_dir_to_compress_with_too_many_targets(temp_dir, project_uuid) -> Path:
|
154 | 136 | return nested_dir
|
155 | 137 |
|
156 | 138 |
|
| 139 | +def strip_directory_from_path(input_path: Path, to_strip: Path) -> Path: |
| 140 | + _to_strip = f"{str(to_strip)}/" |
| 141 | + return Path(str(input_path).replace(_to_strip, "")) |
| 142 | + |
| 143 | + |
| 144 | +def get_all_files_in_dir(dir_path: Path) -> Set[Path]: |
| 145 | + return { |
| 146 | + strip_directory_from_path(x, dir_path) |
| 147 | + for x in dir_path.rglob("*") |
| 148 | + if x.is_file() |
| 149 | + } |
| 150 | + |
| 151 | + |
| 152 | +def _compute_hash(file_path: Path) -> Tuple[Path, str]: |
| 153 | + with open(file_path, "rb") as file_to_hash: |
| 154 | + file_hash = hashlib.md5() |
| 155 | + chunk = file_to_hash.read(8192) |
| 156 | + while chunk: |
| 157 | + file_hash.update(chunk) |
| 158 | + chunk = file_to_hash.read(8192) |
| 159 | + |
| 160 | + return file_path, file_hash.hexdigest() |
| 161 | + |
| 162 | + |
| 163 | +async def compute_hashes(file_paths: List[Path]) -> Dict[Path, str]: |
| 164 | + """given a list of files computes hashes for the files on a process pool""" |
| 165 | + |
| 166 | + loop = asyncio.get_event_loop() |
| 167 | + |
| 168 | + with ProcessPoolExecutor() as prcess_pool_executor: |
| 169 | + tasks = [ |
| 170 | + loop.run_in_executor(prcess_pool_executor, _compute_hash, file_path) |
| 171 | + for file_path in file_paths |
| 172 | + ] |
| 173 | + return {k: v for k, v in await asyncio.gather(*tasks)} |
| 174 | + |
| 175 | + |
| 176 | +def full_file_path_from_dir_and_subdirs(dir_path: Path) -> List[Path]: |
| 177 | + return [x for x in dir_path.rglob("*") if x.is_file()] |
| 178 | + |
| 179 | + |
| 180 | +async def assert_same_directory_content( |
| 181 | + dir_to_compress: Path, output_dir: Path |
| 182 | +) -> None: |
| 183 | + input_set = get_all_files_in_dir(dir_to_compress) |
| 184 | + output_set = get_all_files_in_dir(output_dir) |
| 185 | + assert ( |
| 186 | + input_set == output_set |
| 187 | + ), f"There following files are missing {input_set - output_set}" |
| 188 | + |
| 189 | + # computing the hashes for dir_to_compress and map in a dict |
| 190 | + # with the name starting from the root of the directory and md5sum |
| 191 | + dir_to_compress_hashes = { |
| 192 | + strip_directory_from_path(k, dir_to_compress): v |
| 193 | + for k, v in ( |
| 194 | + await compute_hashes(full_file_path_from_dir_and_subdirs(dir_to_compress)) |
| 195 | + ).items() |
| 196 | + } |
| 197 | + |
| 198 | + # computing the hashes for output_dir and map in a dict |
| 199 | + # with the name starting from the root of the directory and md5sum |
| 200 | + output_dir_hashes = { |
| 201 | + strip_directory_from_path(k, output_dir): v |
| 202 | + for k, v in ( |
| 203 | + await compute_hashes(full_file_path_from_dir_and_subdirs(output_dir)) |
| 204 | + ).items() |
| 205 | + } |
| 206 | + |
| 207 | + # finally check if hashes are mapped 1 to 1 in order to verify |
| 208 | + # that the compress/decompress worked correctly |
| 209 | + for key in dir_to_compress_hashes: |
| 210 | + assert dir_to_compress_hashes[key] == output_dir_hashes[key] |
| 211 | + |
| 212 | + |
157 | 213 | def test_validate_osparc_file_name_ok():
|
158 | 214 | algorithm, digest_sum = validate_osparc_import_name(
|
159 | 215 | "v1#SHA256=80e69a0973e15f4a9c3c180d00a39ee0b0dfafe43356f867983e1180e9b5a892.osparc"
|
|
0 commit comments