From 00047a8be19bf58e1fdad14488f2eeebfd022415 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 25 Jun 2024 12:48:09 +0200 Subject: [PATCH 1/7] added log_directory_changes --- .../src/servicelib/file_utils.py | 29 ++++++++++++++- .../service-library/tests/test_file_utils.py | 37 ++++++++++++++++++- 2 files changed, 64 insertions(+), 2 deletions(-) diff --git a/packages/service-library/src/servicelib/file_utils.py b/packages/service-library/src/servicelib/file_utils.py index f05f35af329..89b3b51f5a0 100644 --- a/packages/service-library/src/servicelib/file_utils.py +++ b/packages/service-library/src/servicelib/file_utils.py @@ -1,8 +1,10 @@ import asyncio import hashlib import shutil +from contextlib import contextmanager +from logging import Logger from pathlib import Path -from typing import Final, Protocol +from typing import Final, Iterator, Protocol # https://docs.python.org/3/library/shutil.html#shutil.rmtree # https://docs.python.org/3/library/os.html#os.remove @@ -67,3 +69,28 @@ async def _eval_hash_async( hasher.update(chunk) digest = hasher.hexdigest() return f"{digest}" + + +@contextmanager +def log_directory_changes(path: Path, logger: Logger, log_level: int) -> Iterator[None]: + before: set[str] = {f"{x.relative_to(path)}" for x in path.rglob("*")} + yield + after: set[str] = {f"{x.relative_to(path)}" for x in path.rglob("*")} + + added_elements = after - before + removed_elements = before - after + + if added_elements or removed_elements: + logger.log(log_level, "Files changes in path: '%s'", f"{path}") + if added_elements: + logger.log( + log_level, + "Files added:\n%s", + "\n".join([f"+ {x}" for x in sorted(added_elements)]), + ) + if removed_elements: + logger.log( + log_level, + "Files removed:\n%s", + "\n".join([f"- {x}" for x in sorted(removed_elements)]), + ) diff --git a/packages/service-library/tests/test_file_utils.py b/packages/service-library/tests/test_file_utils.py index 454106c22b4..0af5e052b70 100644 --- a/packages/service-library/tests/test_file_utils.py +++ b/packages/service-library/tests/test_file_utils.py @@ -1,11 +1,14 @@ # pylint: disable=redefined-outer-name # pylint: disable=unused-argument +import logging from pathlib import Path import pytest from faker import Faker -from servicelib.file_utils import remove_directory +from servicelib.file_utils import log_directory_changes, remove_directory + +_logger = logging.getLogger(__name__) @pytest.fixture @@ -80,3 +83,35 @@ async def test_remove_not_existing_directory_rasing_error( await remove_directory( path=missing_path, only_children=only_children, ignore_errors=False ) + + +async def test_log_directory_changes(caplog: pytest.LogCaptureFixture, some_dir: Path): + # files were added + caplog.clear() + with log_directory_changes(some_dir, _logger, logging.ERROR): + (some_dir / "hoho").mkdir(parents=True, exist_ok=True) + assert "Files changes in path" in caplog.text + assert "Files added:" in caplog.text + + # files were removed + caplog.clear() + with log_directory_changes(some_dir, _logger, logging.ERROR): + await remove_directory(path=some_dir) + assert "Files changes in path" in caplog.text + assert "Files removed:" in caplog.text + + # nothing changed + caplog.clear() + with log_directory_changes(some_dir, _logger, logging.ERROR): + pass + assert caplog.text == "" + + # files added and removed + some_dir.mkdir(parents=True, exist_ok=True) + (some_dir / "som_other_file").touch() + with log_directory_changes(some_dir, _logger, logging.ERROR): + (some_dir / "som_other_file").unlink() + (some_dir / "som_other_file_2").touch() + assert "Files changes in path" in caplog.text + assert "Files added:" in caplog.text + assert "Files removed:" in caplog.text From bc0459fb0d9c171dd8b113c1f0daf876c872d15e Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 25 Jun 2024 12:49:25 +0200 Subject: [PATCH 2/7] added logging in dynamic-sidecar --- .../modules/long_running_tasks.py | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/long_running_tasks.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/long_running_tasks.py index cc0b76c9197..592ce9c39c3 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/long_running_tasks.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/long_running_tasks.py @@ -11,6 +11,7 @@ from models_library.rabbitmq_messages import ProgressType, SimcorePlatformStatus from pydantic import PositiveInt from servicelib.fastapi.long_running_tasks.server import TaskProgress +from servicelib.file_utils import log_directory_changes from servicelib.logging_utils import log_context from servicelib.progress_bar import ProgressBarData from servicelib.utils import logged_gather @@ -476,15 +477,18 @@ async def task_ports_inputs_pull( ), description="pulling inputs", ) as root_progress: - transferred_bytes = await nodeports.download_target_ports( - nodeports.PortTypeName.INPUTS, - mounted_volumes.disk_inputs_path, - port_keys=port_keys, - io_log_redirect_cb=functools.partial( - post_sidecar_log_message, app, log_level=logging.INFO - ), - progress_bar=root_progress, - ) + with log_directory_changes( + mounted_volumes.disk_inputs_path, _logger, logging.INFO + ): + transferred_bytes = await nodeports.download_target_ports( + nodeports.PortTypeName.INPUTS, + mounted_volumes.disk_inputs_path, + port_keys=port_keys, + io_log_redirect_cb=functools.partial( + post_sidecar_log_message, app, log_level=logging.INFO + ), + progress_bar=root_progress, + ) await post_sidecar_log_message( app, "Finished pulling inputs", log_level=logging.INFO ) From 0cf16f5a0328c056e295cbb2189de43811647a56 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 25 Jun 2024 15:03:29 +0200 Subject: [PATCH 3/7] also tracking content changes --- .../src/servicelib/file_utils.py | 32 +++++++++++++++---- .../service-library/tests/test_file_utils.py | 16 ++++++++++ 2 files changed, 42 insertions(+), 6 deletions(-) diff --git a/packages/service-library/src/servicelib/file_utils.py b/packages/service-library/src/servicelib/file_utils.py index 89b3b51f5a0..693f7eaed39 100644 --- a/packages/service-library/src/servicelib/file_utils.py +++ b/packages/service-library/src/servicelib/file_utils.py @@ -62,7 +62,7 @@ async def create_sha256_checksum( async def _eval_hash_async( async_stream: AsyncStream, - hasher: "hashlib._Hash", # noqa: SLF001 + hasher: "hashlib._Hash", chunk_size: ByteSize, ) -> str: while chunk := await async_stream.read(chunk_size): @@ -71,16 +71,30 @@ async def _eval_hash_async( return f"{digest}" +def _get_file_properties(path: Path) -> tuple[float, int]: + stats = path.stat() + return stats.st_mtime, stats.st_size + + +def _get_directory_snapshot(path: Path) -> dict[str, tuple[float, int]]: + return {f"{x.relative_to(path)}": _get_file_properties(x) for x in path.rglob("*")} + + @contextmanager def log_directory_changes(path: Path, logger: Logger, log_level: int) -> Iterator[None]: - before: set[str] = {f"{x.relative_to(path)}" for x in path.rglob("*")} + before: dict[str, tuple[float, int]] = _get_directory_snapshot(path) yield - after: set[str] = {f"{x.relative_to(path)}" for x in path.rglob("*")} + after: dict[str, tuple[float, int]] = _get_directory_snapshot(path) - added_elements = after - before - removed_elements = before - after + after_keys: set[str] = set(after.keys()) + before_keys: set[str] = set(before.keys()) + common_keys = before_keys & after_keys - if added_elements or removed_elements: + added_elements = after_keys - before_keys + removed_elements = before_keys - after_keys + content_changed_elements = {x for x in common_keys if before[x] != after[x]} + + if added_elements or removed_elements or content_changed_elements: logger.log(log_level, "Files changes in path: '%s'", f"{path}") if added_elements: logger.log( @@ -94,3 +108,9 @@ def log_directory_changes(path: Path, logger: Logger, log_level: int) -> Iterato "Files removed:\n%s", "\n".join([f"- {x}" for x in sorted(removed_elements)]), ) + if content_changed_elements: + logger.log( + log_level, + "Files content changed:\n%s", + "\n".join([f"* {x}" for x in sorted(content_changed_elements)]), + ) diff --git a/packages/service-library/tests/test_file_utils.py b/packages/service-library/tests/test_file_utils.py index 0af5e052b70..e1c0d6250f7 100644 --- a/packages/service-library/tests/test_file_utils.py +++ b/packages/service-library/tests/test_file_utils.py @@ -92,6 +92,8 @@ async def test_log_directory_changes(caplog: pytest.LogCaptureFixture, some_dir: (some_dir / "hoho").mkdir(parents=True, exist_ok=True) assert "Files changes in path" in caplog.text assert "Files added:" in caplog.text + assert "Files removed:" not in caplog.text + assert "Files content changed" not in caplog.text # files were removed caplog.clear() @@ -99,6 +101,8 @@ async def test_log_directory_changes(caplog: pytest.LogCaptureFixture, some_dir: await remove_directory(path=some_dir) assert "Files changes in path" in caplog.text assert "Files removed:" in caplog.text + assert "Files added:" not in caplog.text + assert "Files content changed" not in caplog.text # nothing changed caplog.clear() @@ -107,6 +111,7 @@ async def test_log_directory_changes(caplog: pytest.LogCaptureFixture, some_dir: assert caplog.text == "" # files added and removed + caplog.clear() some_dir.mkdir(parents=True, exist_ok=True) (some_dir / "som_other_file").touch() with log_directory_changes(some_dir, _logger, logging.ERROR): @@ -115,3 +120,14 @@ async def test_log_directory_changes(caplog: pytest.LogCaptureFixture, some_dir: assert "Files changes in path" in caplog.text assert "Files added:" in caplog.text assert "Files removed:" in caplog.text + assert "Files content changed" not in caplog.text + + # file content changed + caplog.clear() + (some_dir / "file_to_change").touch() + with log_directory_changes(some_dir, _logger, logging.ERROR): + (some_dir / "file_to_change").write_text("ab") + assert "Files changes in path" in caplog.text + assert "Files added:" not in caplog.text + assert "Files removed:" not in caplog.text + assert "Files content changed" in caplog.text From c5a2f8adca40be21113f4e7ea0b7665d3c697c87 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 26 Jun 2024 14:35:01 +0200 Subject: [PATCH 4/7] repharse --- .../service-library/src/servicelib/file_utils.py | 10 +++++++--- .../service-library/tests/test_file_utils.py | 16 ++++++++-------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/packages/service-library/src/servicelib/file_utils.py b/packages/service-library/src/servicelib/file_utils.py index 693f7eaed39..2224afa6569 100644 --- a/packages/service-library/src/servicelib/file_utils.py +++ b/packages/service-library/src/servicelib/file_utils.py @@ -77,7 +77,11 @@ def _get_file_properties(path: Path) -> tuple[float, int]: def _get_directory_snapshot(path: Path) -> dict[str, tuple[float, int]]: - return {f"{x.relative_to(path)}": _get_file_properties(x) for x in path.rglob("*")} + return { + f"{x.relative_to(path)}": _get_file_properties(x) + for x in path.rglob("*") + if x.is_file() + } @contextmanager @@ -95,7 +99,7 @@ def log_directory_changes(path: Path, logger: Logger, log_level: int) -> Iterato content_changed_elements = {x for x in common_keys if before[x] != after[x]} if added_elements or removed_elements or content_changed_elements: - logger.log(log_level, "Files changes in path: '%s'", f"{path}") + logger.log(log_level, "File changes in path: '%s'", f"{path}") if added_elements: logger.log( log_level, @@ -111,6 +115,6 @@ def log_directory_changes(path: Path, logger: Logger, log_level: int) -> Iterato if content_changed_elements: logger.log( log_level, - "Files content changed:\n%s", + "File content changed:\n%s", "\n".join([f"* {x}" for x in sorted(content_changed_elements)]), ) diff --git a/packages/service-library/tests/test_file_utils.py b/packages/service-library/tests/test_file_utils.py index e1c0d6250f7..44b511b231d 100644 --- a/packages/service-library/tests/test_file_utils.py +++ b/packages/service-library/tests/test_file_utils.py @@ -90,19 +90,19 @@ async def test_log_directory_changes(caplog: pytest.LogCaptureFixture, some_dir: caplog.clear() with log_directory_changes(some_dir, _logger, logging.ERROR): (some_dir / "hoho").mkdir(parents=True, exist_ok=True) - assert "Files changes in path" in caplog.text + assert "File changes in path" in caplog.text assert "Files added:" in caplog.text assert "Files removed:" not in caplog.text - assert "Files content changed" not in caplog.text + assert "File content changed" not in caplog.text # files were removed caplog.clear() with log_directory_changes(some_dir, _logger, logging.ERROR): await remove_directory(path=some_dir) - assert "Files changes in path" in caplog.text + assert "File changes in path" in caplog.text assert "Files removed:" in caplog.text assert "Files added:" not in caplog.text - assert "Files content changed" not in caplog.text + assert "File content changed" not in caplog.text # nothing changed caplog.clear() @@ -117,17 +117,17 @@ async def test_log_directory_changes(caplog: pytest.LogCaptureFixture, some_dir: with log_directory_changes(some_dir, _logger, logging.ERROR): (some_dir / "som_other_file").unlink() (some_dir / "som_other_file_2").touch() - assert "Files changes in path" in caplog.text + assert "File changes in path" in caplog.text assert "Files added:" in caplog.text assert "Files removed:" in caplog.text - assert "Files content changed" not in caplog.text + assert "File content changed" not in caplog.text # file content changed caplog.clear() (some_dir / "file_to_change").touch() with log_directory_changes(some_dir, _logger, logging.ERROR): (some_dir / "file_to_change").write_text("ab") - assert "Files changes in path" in caplog.text + assert "File changes in path" in caplog.text assert "Files added:" not in caplog.text assert "Files removed:" not in caplog.text - assert "Files content changed" in caplog.text + assert "File content changed" in caplog.text From 90682d6295501f84bde13d18296b5cedfa4abc30 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 26 Jun 2024 14:38:23 +0200 Subject: [PATCH 5/7] updated tests --- packages/service-library/tests/test_file_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/service-library/tests/test_file_utils.py b/packages/service-library/tests/test_file_utils.py index 44b511b231d..5d3a5fa6deb 100644 --- a/packages/service-library/tests/test_file_utils.py +++ b/packages/service-library/tests/test_file_utils.py @@ -89,7 +89,7 @@ async def test_log_directory_changes(caplog: pytest.LogCaptureFixture, some_dir: # files were added caplog.clear() with log_directory_changes(some_dir, _logger, logging.ERROR): - (some_dir / "hoho").mkdir(parents=True, exist_ok=True) + (some_dir / "hoho").touch() assert "File changes in path" in caplog.text assert "Files added:" in caplog.text assert "Files removed:" not in caplog.text From 0e8451a49ba5861eb59c05c6c6885ea7f10ee2c8 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 26 Jun 2024 14:39:31 +0200 Subject: [PATCH 6/7] directories don't trigger changes --- packages/service-library/tests/test_file_utils.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/packages/service-library/tests/test_file_utils.py b/packages/service-library/tests/test_file_utils.py index 5d3a5fa6deb..b5feff78603 100644 --- a/packages/service-library/tests/test_file_utils.py +++ b/packages/service-library/tests/test_file_utils.py @@ -86,6 +86,15 @@ async def test_remove_not_existing_directory_rasing_error( async def test_log_directory_changes(caplog: pytest.LogCaptureFixture, some_dir: Path): + # directory cretion triggers no changes + caplog.clear() + with log_directory_changes(some_dir, _logger, logging.ERROR): + (some_dir / "a-dir").mkdir(parents=True, exist_ok=True) + assert "File changes in path" not in caplog.text + assert "Files added:" not in caplog.text + assert "Files removed:" not in caplog.text + assert "File content changed" not in caplog.text + # files were added caplog.clear() with log_directory_changes(some_dir, _logger, logging.ERROR): From b3c31946a71983dfc91326808595b48d0197ab47 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 27 Jun 2024 07:27:34 +0200 Subject: [PATCH 7/7] refactor --- packages/service-library/src/servicelib/file_utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/service-library/src/servicelib/file_utils.py b/packages/service-library/src/servicelib/file_utils.py index 2224afa6569..c90468cba2a 100644 --- a/packages/service-library/src/servicelib/file_utils.py +++ b/packages/service-library/src/servicelib/file_utils.py @@ -78,9 +78,9 @@ def _get_file_properties(path: Path) -> tuple[float, int]: def _get_directory_snapshot(path: Path) -> dict[str, tuple[float, int]]: return { - f"{x.relative_to(path)}": _get_file_properties(x) - for x in path.rglob("*") - if x.is_file() + f"{p.relative_to(path)}": _get_file_properties(p) + for p in path.rglob("*") + if p.is_file() }