From ddceb1192d4b6f82ad2c477e836cf746832f22d1 Mon Sep 17 00:00:00 2001 From: Michael Brewer Date: Fri, 17 Dec 2021 21:28:12 -0800 Subject: [PATCH 01/12] feat(idempontency): add support for python dataclasses --- .../utilities/idempotency/persistence/base.py | 9 ++++-- .../idempotency/test_idempotency.py | 30 +++++++++++++++++++ 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/aws_lambda_powertools/utilities/idempotency/persistence/base.py b/aws_lambda_powertools/utilities/idempotency/persistence/base.py index 8f2b30d289a..b4ccc874810 100644 --- a/aws_lambda_powertools/utilities/idempotency/persistence/base.py +++ b/aws_lambda_powertools/utilities/idempotency/persistence/base.py @@ -1,7 +1,6 @@ """ Persistence layers supporting idempotency """ - import datetime import hashlib import json @@ -226,7 +225,13 @@ def _generate_hash(self, data: Any) -> str: Hashed representation of the provided data """ - data = getattr(data, "raw_event", data) # could be a data class depending on decorator order + if hasattr(data, "__dataclass_fields__"): + import dataclasses + + data = dataclasses.asdict(data) + else: + data = getattr(data, "raw_event", data) + hashed_data = self.hash_function(json.dumps(data, cls=Encoder, sort_keys=True).encode()) return hashed_data.hexdigest() diff --git a/tests/functional/idempotency/test_idempotency.py b/tests/functional/idempotency/test_idempotency.py index a8cf652d8a0..e8c08e237ed 100644 --- a/tests/functional/idempotency/test_idempotency.py +++ b/tests/functional/idempotency/test_idempotency.py @@ -1057,3 +1057,33 @@ def two(data): assert one(data=mock_event) == "one" assert two(data=mock_event) == "two" assert len(persistence_store.table.method_calls) == 4 + + +def test_idempotent_function_dataclasses(): + try: + # Scenario idempotent_function should allow for python dataclasses + from dataclasses import asdict, dataclass + + @dataclass + class Foo: + name: str + + mock_event = Foo(name="Bar") + persistence_layer = MockPersistenceLayer( + "test-func.record_handler#" + hashlib.md5(serialize(asdict(mock_event)).encode()).hexdigest() + ) + expected_result = {"message": "Foo"} + + @idempotent_function(persistence_store=persistence_layer, data_keyword_argument="record") + def record_handler(record): + assert isinstance(record, Foo) + return expected_result + + # WHEN calling the function + result = record_handler(record=mock_event) + # THEN we expect the function to execute successfully + assert result == expected_result + + except ModuleNotFoundError: + # Python 3.6 + pass From 6e064a1b08faf60a92aa5623567aba50ad77a757 Mon Sep 17 00:00:00 2001 From: Michael Brewer Date: Fri, 17 Dec 2021 23:41:59 -0800 Subject: [PATCH 02/12] refactor: add _prepare_data for json.dumps --- .../utilities/idempotency/persistence/base.py | 24 ++++++++----- .../idempotency/test_idempotency.py | 36 ++++++++++--------- 2 files changed, 35 insertions(+), 25 deletions(-) diff --git a/aws_lambda_powertools/utilities/idempotency/persistence/base.py b/aws_lambda_powertools/utilities/idempotency/persistence/base.py index b4ccc874810..b968e30f564 100644 --- a/aws_lambda_powertools/utilities/idempotency/persistence/base.py +++ b/aws_lambda_powertools/utilities/idempotency/persistence/base.py @@ -104,6 +104,21 @@ def response_json_as_dict(self) -> dict: return json.loads(self.response_data) +def _prepare_data(data: Any) -> Any: + # Convert pydantic as a dict + _dict = getattr(data, "dict", None) + if callable(_dict): + return _dict() + + # Convert dataclasses as a dict + if hasattr(data, "__dataclass_fields__"): + import dataclasses + + return dataclasses.asdict(data) + + return getattr(data, "raw_event", data) + + class BasePersistenceLayer(ABC): """ Abstract Base Class for Idempotency persistence layer. @@ -225,14 +240,7 @@ def _generate_hash(self, data: Any) -> str: Hashed representation of the provided data """ - if hasattr(data, "__dataclass_fields__"): - import dataclasses - - data = dataclasses.asdict(data) - else: - data = getattr(data, "raw_event", data) - - hashed_data = self.hash_function(json.dumps(data, cls=Encoder, sort_keys=True).encode()) + hashed_data = self.hash_function(json.dumps(_prepare_data(data), cls=Encoder, sort_keys=True).encode()) return hashed_data.hexdigest() def _validate_payload(self, data: Dict[str, Any], data_record: DataRecord) -> None: diff --git a/tests/functional/idempotency/test_idempotency.py b/tests/functional/idempotency/test_idempotency.py index e8c08e237ed..7c2e398ed79 100644 --- a/tests/functional/idempotency/test_idempotency.py +++ b/tests/functional/idempotency/test_idempotency.py @@ -20,7 +20,7 @@ IdempotencyValidationError, ) from aws_lambda_powertools.utilities.idempotency.idempotency import idempotent, idempotent_function -from aws_lambda_powertools.utilities.idempotency.persistence.base import BasePersistenceLayer, DataRecord +from aws_lambda_powertools.utilities.idempotency.persistence.base import BasePersistenceLayer, DataRecord, _prepare_data from aws_lambda_powertools.utilities.validation import envelopes, validator from tests.functional.idempotency.conftest import serialize from tests.functional.utils import load_event @@ -1061,29 +1061,31 @@ def two(data): def test_idempotent_function_dataclasses(): try: - # Scenario idempotent_function should allow for python dataclasses + # Scenario _prepare_data should allow for python dataclasses from dataclasses import asdict, dataclass @dataclass class Foo: name: str - mock_event = Foo(name="Bar") - persistence_layer = MockPersistenceLayer( - "test-func.record_handler#" + hashlib.md5(serialize(asdict(mock_event)).encode()).hexdigest() - ) - expected_result = {"message": "Foo"} - - @idempotent_function(persistence_store=persistence_layer, data_keyword_argument="record") - def record_handler(record): - assert isinstance(record, Foo) - return expected_result - - # WHEN calling the function - result = record_handler(record=mock_event) - # THEN we expect the function to execute successfully - assert result == expected_result + data = Foo(name="Bar") + as_dict = _prepare_data(data) + assert asdict(data) == as_dict + assert as_dict == {"name": "Bar"} except ModuleNotFoundError: # Python 3.6 pass + + +def test_idempotent_function_pydantic(): + # Scenario _prepare_data should allow for pydantic + from pydantic import BaseModel + + class Foo(BaseModel): + name: str + + data = Foo(name="Bar") + as_dict = _prepare_data(data) + assert as_dict == data.dict() + assert as_dict == {"name": "Bar"} From bf15b0b2826f50d4971f5fa1173952df552ae41c Mon Sep 17 00:00:00 2001 From: Michael Brewer Date: Sat, 18 Dec 2021 08:39:46 -0800 Subject: [PATCH 03/12] tests: make sure other types are not converted --- .../utilities/idempotency/persistence/base.py | 7 ++----- .../idempotency/test_idempotency.py | 21 ++++++++++++------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/aws_lambda_powertools/utilities/idempotency/persistence/base.py b/aws_lambda_powertools/utilities/idempotency/persistence/base.py index b968e30f564..e42380c0f1b 100644 --- a/aws_lambda_powertools/utilities/idempotency/persistence/base.py +++ b/aws_lambda_powertools/utilities/idempotency/persistence/base.py @@ -105,12 +105,9 @@ def response_json_as_dict(self) -> dict: def _prepare_data(data: Any) -> Any: - # Convert pydantic as a dict - _dict = getattr(data, "dict", None) - if callable(_dict): - return _dict() + if callable(getattr(data, "dict", None)): + return data.dict() - # Convert dataclasses as a dict if hasattr(data, "__dataclass_fields__"): import dataclasses diff --git a/tests/functional/idempotency/test_idempotency.py b/tests/functional/idempotency/test_idempotency.py index 7c2e398ed79..e01c664f99b 100644 --- a/tests/functional/idempotency/test_idempotency.py +++ b/tests/functional/idempotency/test_idempotency.py @@ -1061,31 +1061,38 @@ def two(data): def test_idempotent_function_dataclasses(): try: - # Scenario _prepare_data should allow for python dataclasses + # Scenario _prepare_data should convert a python dataclasses to a dict from dataclasses import asdict, dataclass @dataclass class Foo: name: str + expected_result = {"name": "Bar"} data = Foo(name="Bar") as_dict = _prepare_data(data) - assert asdict(data) == as_dict - assert as_dict == {"name": "Bar"} + assert as_dict == asdict(data) + assert as_dict == expected_result except ModuleNotFoundError: - # Python 3.6 - pass + pass # Python 3.6 def test_idempotent_function_pydantic(): - # Scenario _prepare_data should allow for pydantic + # Scenario _prepare_data should convert a pydantic to a dict from pydantic import BaseModel class Foo(BaseModel): name: str + expected_result = {"name": "Bar"} data = Foo(name="Bar") as_dict = _prepare_data(data) assert as_dict == data.dict() - assert as_dict == {"name": "Bar"} + assert as_dict == expected_result + + +@pytest.mark.parametrize("data", [None, "foo", ["foo"], 1, True, {}]) +def test_idempotent_function_other(data): + # All other data types should be left as is + assert _prepare_data(data) == data From a1e26d51f3a85ec8954ed4988fb24bce2935e9a2 Mon Sep 17 00:00:00 2001 From: Michael Brewer Date: Sat, 18 Dec 2021 17:47:49 -0800 Subject: [PATCH 04/12] chore: add docs --- aws_lambda_powertools/utilities/idempotency/persistence/base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/aws_lambda_powertools/utilities/idempotency/persistence/base.py b/aws_lambda_powertools/utilities/idempotency/persistence/base.py index e42380c0f1b..539152fecbe 100644 --- a/aws_lambda_powertools/utilities/idempotency/persistence/base.py +++ b/aws_lambda_powertools/utilities/idempotency/persistence/base.py @@ -105,6 +105,8 @@ def response_json_as_dict(self) -> dict: def _prepare_data(data: Any) -> Any: + """Prepare data for json serialization. + This will convert dataclasses, pydantic models or event source data classes to a dict.""" if callable(getattr(data, "dict", None)): return data.dict() From 64965d45bcb4d072c0797bd28991689ffcd5afb5 Mon Sep 17 00:00:00 2001 From: Michael Brewer Date: Sat, 18 Dec 2021 17:57:26 -0800 Subject: [PATCH 05/12] From e9535a5b7d71fe24c60d1f4580f41ba74ca7efdb Mon Sep 17 00:00:00 2001 From: Michael Brewer Date: Sun, 19 Dec 2021 19:31:50 -0800 Subject: [PATCH 06/12] chore: code review changes --- tests/functional/idempotency/test_idempotency.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/functional/idempotency/test_idempotency.py b/tests/functional/idempotency/test_idempotency.py index e01c664f99b..b0b93b4eeb6 100644 --- a/tests/functional/idempotency/test_idempotency.py +++ b/tests/functional/idempotency/test_idempotency.py @@ -8,6 +8,7 @@ import jmespath import pytest from botocore import stub +from pydantic import BaseModel from aws_lambda_powertools.utilities.data_classes import APIGatewayProxyEventV2, event_source from aws_lambda_powertools.utilities.idempotency import DynamoDBPersistenceLayer, IdempotencyConfig @@ -1080,8 +1081,6 @@ class Foo: def test_idempotent_function_pydantic(): # Scenario _prepare_data should convert a pydantic to a dict - from pydantic import BaseModel - class Foo(BaseModel): name: str From 6430f8d86833747a28742defcd9e96d1ad7d820e Mon Sep 17 00:00:00 2001 From: Michael Brewer Date: Tue, 21 Dec 2021 21:29:39 -0800 Subject: [PATCH 07/12] chore: slight refactor --- .../utilities/idempotency/persistence/base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/aws_lambda_powertools/utilities/idempotency/persistence/base.py b/aws_lambda_powertools/utilities/idempotency/persistence/base.py index 539152fecbe..7c4233cb03e 100644 --- a/aws_lambda_powertools/utilities/idempotency/persistence/base.py +++ b/aws_lambda_powertools/utilities/idempotency/persistence/base.py @@ -107,14 +107,14 @@ def response_json_as_dict(self) -> dict: def _prepare_data(data: Any) -> Any: """Prepare data for json serialization. This will convert dataclasses, pydantic models or event source data classes to a dict.""" - if callable(getattr(data, "dict", None)): - return data.dict() - if hasattr(data, "__dataclass_fields__"): import dataclasses return dataclasses.asdict(data) + if callable(getattr(data, "dict", None)): + return data.dict() + return getattr(data, "raw_event", data) From 4f69010b5d4c126c76cf63d2fff449a51ce25b26 Mon Sep 17 00:00:00 2001 From: Michael Brewer Date: Tue, 21 Dec 2021 21:51:13 -0800 Subject: [PATCH 08/12] refactor: do the _prepare_data as soon as possible --- .../utilities/idempotency/base.py | 16 ++++++++++- .../utilities/idempotency/persistence/base.py | 16 +---------- .../idempotency/test_idempotency.py | 27 ++++++++++--------- 3 files changed, 30 insertions(+), 29 deletions(-) diff --git a/aws_lambda_powertools/utilities/idempotency/base.py b/aws_lambda_powertools/utilities/idempotency/base.py index 7dee94fc356..b08a449e9b0 100644 --- a/aws_lambda_powertools/utilities/idempotency/base.py +++ b/aws_lambda_powertools/utilities/idempotency/base.py @@ -21,6 +21,20 @@ logger = logging.getLogger(__name__) +def _prepare_data(data: Any) -> Any: + """Prepare data for json serialization. + This will convert dataclasses, pydantic models or event source data classes to a dict.""" + if hasattr(data, "__dataclass_fields__"): + import dataclasses + + return dataclasses.asdict(data) + + if callable(getattr(data, "dict", None)): + return data.dict() + + return getattr(data, "raw_event", data) + + class IdempotencyHandler: """ Base class to orchestrate calls to persistence layer. @@ -52,7 +66,7 @@ def __init__( Function keyword arguments """ self.function = function - self.data = function_payload + self.data = _prepare_data(function_payload) self.fn_args = function_args self.fn_kwargs = function_kwargs diff --git a/aws_lambda_powertools/utilities/idempotency/persistence/base.py b/aws_lambda_powertools/utilities/idempotency/persistence/base.py index 7c4233cb03e..b07662e6432 100644 --- a/aws_lambda_powertools/utilities/idempotency/persistence/base.py +++ b/aws_lambda_powertools/utilities/idempotency/persistence/base.py @@ -104,20 +104,6 @@ def response_json_as_dict(self) -> dict: return json.loads(self.response_data) -def _prepare_data(data: Any) -> Any: - """Prepare data for json serialization. - This will convert dataclasses, pydantic models or event source data classes to a dict.""" - if hasattr(data, "__dataclass_fields__"): - import dataclasses - - return dataclasses.asdict(data) - - if callable(getattr(data, "dict", None)): - return data.dict() - - return getattr(data, "raw_event", data) - - class BasePersistenceLayer(ABC): """ Abstract Base Class for Idempotency persistence layer. @@ -239,7 +225,7 @@ def _generate_hash(self, data: Any) -> str: Hashed representation of the provided data """ - hashed_data = self.hash_function(json.dumps(_prepare_data(data), cls=Encoder, sort_keys=True).encode()) + hashed_data = self.hash_function(json.dumps(data, cls=Encoder, sort_keys=True).encode()) return hashed_data.hexdigest() def _validate_payload(self, data: Dict[str, Any], data_record: DataRecord) -> None: diff --git a/tests/functional/idempotency/test_idempotency.py b/tests/functional/idempotency/test_idempotency.py index c397b1ee785..32938aff2c9 100644 --- a/tests/functional/idempotency/test_idempotency.py +++ b/tests/functional/idempotency/test_idempotency.py @@ -12,6 +12,7 @@ from aws_lambda_powertools.utilities.data_classes import APIGatewayProxyEventV2, event_source from aws_lambda_powertools.utilities.idempotency import DynamoDBPersistenceLayer, IdempotencyConfig +from aws_lambda_powertools.utilities.idempotency.base import _prepare_data from aws_lambda_powertools.utilities.idempotency.exceptions import ( IdempotencyAlreadyInProgressError, IdempotencyInconsistentStateError, @@ -21,7 +22,7 @@ IdempotencyValidationError, ) from aws_lambda_powertools.utilities.idempotency.idempotency import idempotent, idempotent_function -from aws_lambda_powertools.utilities.idempotency.persistence.base import BasePersistenceLayer, DataRecord, _prepare_data +from aws_lambda_powertools.utilities.idempotency.persistence.base import BasePersistenceLayer, DataRecord from aws_lambda_powertools.utilities.validation import envelopes, validator from tests.functional.idempotency.conftest import serialize from tests.functional.utils import load_event @@ -1060,6 +1061,18 @@ def two(data): assert len(persistence_store.table.method_calls) == 4 +def test_invalid_dynamodb_persistence_layer(): + # Scenario constructing a DynamoDBPersistenceLayer with a key_attr matching sort_key_attr should fail + with pytest.raises(ValueError) as ve: + DynamoDBPersistenceLayer( + table_name="Foo", + key_attr="id", + sort_key_attr="id", + ) + # and raise a ValueError + assert str(ve.value) == "key_attr [id] and sort_key_attr [id] cannot be the same!" + + def test_idempotent_function_dataclasses(): try: # Scenario _prepare_data should convert a python dataclasses to a dict @@ -1095,15 +1108,3 @@ class Foo(BaseModel): def test_idempotent_function_other(data): # All other data types should be left as is assert _prepare_data(data) == data - - -def test_invalid_dynamodb_persistence_layer(): - # Scenario constructing a DynamoDBPersistenceLayer with a key_attr matching sort_key_attr should fail - with pytest.raises(ValueError) as ve: - DynamoDBPersistenceLayer( - table_name="Foo", - key_attr="id", - sort_key_attr="id", - ) - # and raise a ValueError - assert str(ve.value) == "key_attr [id] and sort_key_attr [id] cannot be the same!" From 91726c10aa0c1136f61c40320af4127050a48add Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Wed, 22 Dec 2021 11:05:50 +0100 Subject: [PATCH 09/12] chore: test dataclasses/pydantic with jmespath --- .../utilities/idempotency/base.py | 5 +- .../idempotency/test_idempotency.py | 86 ++++++++++++++++--- 2 files changed, 76 insertions(+), 15 deletions(-) diff --git a/aws_lambda_powertools/utilities/idempotency/base.py b/aws_lambda_powertools/utilities/idempotency/base.py index b08a449e9b0..dddc36b426d 100644 --- a/aws_lambda_powertools/utilities/idempotency/base.py +++ b/aws_lambda_powertools/utilities/idempotency/base.py @@ -23,7 +23,10 @@ def _prepare_data(data: Any) -> Any: """Prepare data for json serialization. - This will convert dataclasses, pydantic models or event source data classes to a dict.""" + + We will convert Python dataclasses, pydantic models or event source data classes to a dict, + otherwise return data as-is. + """ if hasattr(data, "__dataclass_fields__"): import dataclasses diff --git a/tests/functional/idempotency/test_idempotency.py b/tests/functional/idempotency/test_idempotency.py index 32938aff2c9..34d1ffdd296 100644 --- a/tests/functional/idempotency/test_idempotency.py +++ b/tests/functional/idempotency/test_idempotency.py @@ -30,6 +30,13 @@ TABLE_NAME = "TEST_TABLE" +def get_dataclasses_lib(): + """Python 3.6 doesn't support dataclasses natively""" + import dataclasses + + return dataclasses + + # Using parametrize to run test twice, with two separate instances of persistence store. One instance with caching # enabled, and one without. @pytest.mark.parametrize("idempotency_config", [{"use_local_cache": False}, {"use_local_cache": True}], indirect=True) @@ -1073,23 +1080,20 @@ def test_invalid_dynamodb_persistence_layer(): assert str(ve.value) == "key_attr [id] and sort_key_attr [id] cannot be the same!" +@pytest.mark.skipif(sys.version_info < (3, 7), reason="requires python3.7 or higher for dataclasses") def test_idempotent_function_dataclasses(): - try: - # Scenario _prepare_data should convert a python dataclasses to a dict - from dataclasses import asdict, dataclass + # Scenario _prepare_data should convert a python dataclasses to a dict + dataclasses = get_dataclasses_lib() - @dataclass - class Foo: - name: str - - expected_result = {"name": "Bar"} - data = Foo(name="Bar") - as_dict = _prepare_data(data) - assert as_dict == asdict(data) - assert as_dict == expected_result + @dataclasses.dataclass + class Foo: + name: str - except ModuleNotFoundError: - pass # Python 3.6 + expected_result = {"name": "Bar"} + data = Foo(name="Bar") + as_dict = _prepare_data(data) + assert as_dict == dataclasses.asdict(data) + assert as_dict == expected_result def test_idempotent_function_pydantic(): @@ -1108,3 +1112,57 @@ class Foo(BaseModel): def test_idempotent_function_other(data): # All other data types should be left as is assert _prepare_data(data) == data + + +@pytest.mark.skipif(sys.version_info < (3, 7), reason="requires python3.7 or higher for dataclasses") +def test_idempotent_function_dataclass_with_jmespath(): + # GIVEN + dataclasses = get_dataclasses_lib() + config = IdempotencyConfig(event_key_jmespath="transaction_id", use_local_cache=True) + mock_event = {"customer_id": "fake", "transaction_id": "fake-id"} + persistence_layer = MockPersistenceLayer( + expected_idempotency_key="test-func.collect_payment#" + + hashlib.md5(serialize(mock_event["transaction_id"]).encode()).hexdigest() + ) + + @dataclasses.dataclass + class Payment: + customer_id: str + transaction_id: str + + @idempotent_function(data_keyword_argument="payment", persistence_store=persistence_layer, config=config) + def collect_payment(payment: Payment): + return payment.transaction_id + + # WHEN + payment = Payment(**mock_event) + result = collect_payment(payment=payment) + + # THEN idempotency key assertion happens at MockPersistenceLayer + assert result == payment.transaction_id + + +@pytest.mark.skipif(sys.version_info < (3, 7), reason="requires python3.7 or higher for dataclasses") +def test_idempotent_function_pydantic_with_jmespath(): + # GIVEN + config = IdempotencyConfig(event_key_jmespath="transaction_id", use_local_cache=True) + mock_event = {"customer_id": "fake", "transaction_id": "fake-id"} + persistence_layer = MockPersistenceLayer( + expected_idempotency_key="test-func.collect_payment#" + + hashlib.md5(serialize(mock_event["transaction_id"]).encode()).hexdigest() + ) + + class Payment(BaseModel): + customer_id: str + transaction_id: str + + @idempotent_function(data_keyword_argument="payment", persistence_store=persistence_layer, config=config) + def collect_payment(payment: Payment): + return payment.transaction_id + + # WHEN + payment = Payment(**mock_event) + result = collect_payment(payment=payment) + + # THEN idempotency key assertion happens at MockPersistenceLayer + assert result == payment.transaction_id From 0c8a2d460f1179eb321f9c20f8960f9a6dfa1ccc Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Wed, 22 Dec 2021 11:26:30 +0100 Subject: [PATCH 10/12] chore: move serialization/hashing to test util --- tests/functional/idempotency/conftest.py | 18 ++---- .../idempotency/test_idempotency.py | 62 +++++++------------ tests/functional/utils.py | 12 ++++ 3 files changed, 40 insertions(+), 52 deletions(-) diff --git a/tests/functional/idempotency/conftest.py b/tests/functional/idempotency/conftest.py index 0f74d503b88..a6bcf072a82 100644 --- a/tests/functional/idempotency/conftest.py +++ b/tests/functional/idempotency/conftest.py @@ -1,5 +1,4 @@ import datetime -import hashlib import json from collections import namedtuple from decimal import Decimal @@ -11,20 +10,15 @@ from botocore.config import Config from jmespath import functions -from aws_lambda_powertools.shared.json_encoder import Encoder from aws_lambda_powertools.utilities.idempotency import DynamoDBPersistenceLayer from aws_lambda_powertools.utilities.idempotency.idempotency import IdempotencyConfig from aws_lambda_powertools.utilities.jmespath_utils import extract_data_from_envelope from aws_lambda_powertools.utilities.validation import envelopes -from tests.functional.utils import load_event +from tests.functional.utils import hash_idempotency_key, json_serialize, load_event TABLE_NAME = "TEST_TABLE" -def serialize(data): - return json.dumps(data, sort_keys=True, cls=Encoder) - - @pytest.fixture(scope="module") def config() -> Config: return Config(region_name="us-east-1") @@ -66,12 +60,12 @@ def lambda_response(): @pytest.fixture(scope="module") def serialized_lambda_response(lambda_response): - return serialize(lambda_response) + return json_serialize(lambda_response) @pytest.fixture(scope="module") def deserialized_lambda_response(lambda_response): - return json.loads(serialize(lambda_response)) + return json.loads(json_serialize(lambda_response)) @pytest.fixture @@ -150,7 +144,7 @@ def expected_params_put_item_with_validation(hashed_idempotency_key, hashed_vali def hashed_idempotency_key(lambda_apigw_event, default_jmespath, lambda_context): compiled_jmespath = jmespath.compile(default_jmespath) data = compiled_jmespath.search(lambda_apigw_event) - return "test-func.lambda_handler#" + hashlib.md5(serialize(data).encode()).hexdigest() + return "test-func.lambda_handler#" + hash_idempotency_key(data) @pytest.fixture @@ -158,12 +152,12 @@ def hashed_idempotency_key_with_envelope(lambda_apigw_event): event = extract_data_from_envelope( data=lambda_apigw_event, envelope=envelopes.API_GATEWAY_HTTP, jmespath_options={} ) - return "test-func.lambda_handler#" + hashlib.md5(serialize(event).encode()).hexdigest() + return "test-func.lambda_handler#" + hash_idempotency_key(event) @pytest.fixture def hashed_validation_key(lambda_apigw_event): - return hashlib.md5(serialize(lambda_apigw_event["requestContext"]).encode()).hexdigest() + return hash_idempotency_key(lambda_apigw_event["requestContext"]) @pytest.fixture diff --git a/tests/functional/idempotency/test_idempotency.py b/tests/functional/idempotency/test_idempotency.py index 34d1ffdd296..0ed2cfcfb59 100644 --- a/tests/functional/idempotency/test_idempotency.py +++ b/tests/functional/idempotency/test_idempotency.py @@ -1,6 +1,4 @@ import copy -import hashlib -import json import sys from hashlib import md5 from unittest.mock import MagicMock @@ -24,8 +22,7 @@ from aws_lambda_powertools.utilities.idempotency.idempotency import idempotent, idempotent_function from aws_lambda_powertools.utilities.idempotency.persistence.base import BasePersistenceLayer, DataRecord from aws_lambda_powertools.utilities.validation import envelopes, validator -from tests.functional.idempotency.conftest import serialize -from tests.functional.utils import load_event +from tests.functional.utils import hash_idempotency_key, json_serialize, load_event TABLE_NAME = "TEST_TABLE" @@ -753,7 +750,7 @@ def test_default_no_raise_on_missing_idempotency_key( hashed_key = persistence_store._get_hashed_idempotency_key({}) # THEN return the hash of None - expected_value = f"test-func.{function_name}#" + md5(serialize(None).encode()).hexdigest() + expected_value = f"test-func.{function_name}#" + md5(json_serialize(None).encode()).hexdigest() assert expected_value == hashed_key @@ -797,7 +794,7 @@ def test_jmespath_with_powertools_json( expected_value = [sub_attr_value, static_pk_value] api_gateway_proxy_event = { "requestContext": {"authorizer": {"claims": {"sub": sub_attr_value}}}, - "body": serialize({"id": static_pk_value}), + "body": json_serialize({"id": static_pk_value}), } # WHEN calling _get_hashed_idempotency_key @@ -881,9 +878,7 @@ def _delete_record(self, data_record: DataRecord) -> None: def test_idempotent_lambda_event_source(lambda_context): # Scenario to validate that we can use the event_source decorator before or after the idempotent decorator mock_event = load_event("apiGatewayProxyV2Event.json") - persistence_layer = MockPersistenceLayer( - "test-func.lambda_handler#" + hashlib.md5(serialize(mock_event).encode()).hexdigest() - ) + persistence_layer = MockPersistenceLayer("test-func.lambda_handler#" + hash_idempotency_key(mock_event)) expected_result = {"message": "Foo"} # GIVEN an event_source decorator @@ -903,9 +898,8 @@ def lambda_handler(event, _): def test_idempotent_function(): # Scenario to validate we can use idempotent_function with any function mock_event = {"data": "value"} - persistence_layer = MockPersistenceLayer( - "test-func.record_handler#" + hashlib.md5(serialize(mock_event).encode()).hexdigest() - ) + idempotency_key = "test-func.record_handler#" + hash_idempotency_key(mock_event) + persistence_layer = MockPersistenceLayer(expected_idempotency_key=idempotency_key) expected_result = {"message": "Foo"} @idempotent_function(persistence_store=persistence_layer, data_keyword_argument="record") @@ -922,9 +916,8 @@ def test_idempotent_function_arbitrary_args_kwargs(): # Scenario to validate we can use idempotent_function with a function # with an arbitrary number of args and kwargs mock_event = {"data": "value"} - persistence_layer = MockPersistenceLayer( - "test-func.record_handler#" + hashlib.md5(serialize(mock_event).encode()).hexdigest() - ) + idempotency_key = "test-func.record_handler#" + hash_idempotency_key(mock_event) + persistence_layer = MockPersistenceLayer(expected_idempotency_key=idempotency_key) expected_result = {"message": "Foo"} @idempotent_function(persistence_store=persistence_layer, data_keyword_argument="record") @@ -939,9 +932,8 @@ def record_handler(arg_one, arg_two, record, is_record): def test_idempotent_function_invalid_data_kwarg(): mock_event = {"data": "value"} - persistence_layer = MockPersistenceLayer( - "test-func.record_handler#" + hashlib.md5(serialize(mock_event).encode()).hexdigest() - ) + idempotency_key = "test-func.record_handler#" + hash_idempotency_key(mock_event) + persistence_layer = MockPersistenceLayer(expected_idempotency_key=idempotency_key) expected_result = {"message": "Foo"} keyword_argument = "payload" @@ -958,9 +950,8 @@ def record_handler(record): def test_idempotent_function_arg_instead_of_kwarg(): mock_event = {"data": "value"} - persistence_layer = MockPersistenceLayer( - "test-func.record_handler#" + hashlib.md5(serialize(mock_event).encode()).hexdigest() - ) + idempotency_key = "test-func.record_handler#" + hash_idempotency_key(mock_event) + persistence_layer = MockPersistenceLayer(expected_idempotency_key=idempotency_key) expected_result = {"message": "Foo"} keyword_argument = "record" @@ -978,18 +969,15 @@ def record_handler(record): def test_idempotent_function_and_lambda_handler(lambda_context): # Scenario to validate we can use both idempotent_function and idempotent decorators mock_event = {"data": "value"} - persistence_layer = MockPersistenceLayer( - "test-func.record_handler#" + hashlib.md5(serialize(mock_event).encode()).hexdigest() - ) + idempotency_key = "test-func.record_handler#" + hash_idempotency_key(mock_event) + persistence_layer = MockPersistenceLayer(expected_idempotency_key=idempotency_key) expected_result = {"message": "Foo"} @idempotent_function(persistence_store=persistence_layer, data_keyword_argument="record") def record_handler(record): return expected_result - persistence_layer = MockPersistenceLayer( - "test-func.lambda_handler#" + hashlib.md5(serialize(mock_event).encode()).hexdigest() - ) + persistence_layer = MockPersistenceLayer("test-func.lambda_handler#" + hash_idempotency_key(mock_event)) @idempotent(persistence_store=persistence_layer) def lambda_handler(event, _): @@ -1010,18 +998,16 @@ def test_idempotent_data_sorting(): # Scenario to validate same data in different order hashes to the same idempotency key data_one = {"data": "test message 1", "more_data": "more data 1"} data_two = {"more_data": "more data 1", "data": "test message 1"} - + idempotency_key = "test-func.dummy#" + hash_idempotency_key(data_one) # Assertion will happen in MockPersistenceLayer - persistence_layer = MockPersistenceLayer( - "test-func.dummy#" + hashlib.md5(json.dumps(data_one).encode()).hexdigest() - ) + persistence_layer = MockPersistenceLayer(expected_idempotency_key=idempotency_key) # GIVEN @idempotent_function(data_keyword_argument="payload", persistence_store=persistence_layer) def dummy(payload): return {"message": "hello"} - # WHEN + # WHEN/THEN assertion will happen at MockPersistenceLayer dummy(payload=data_two) @@ -1120,10 +1106,8 @@ def test_idempotent_function_dataclass_with_jmespath(): dataclasses = get_dataclasses_lib() config = IdempotencyConfig(event_key_jmespath="transaction_id", use_local_cache=True) mock_event = {"customer_id": "fake", "transaction_id": "fake-id"} - persistence_layer = MockPersistenceLayer( - expected_idempotency_key="test-func.collect_payment#" - + hashlib.md5(serialize(mock_event["transaction_id"]).encode()).hexdigest() - ) + idempotency_key = "test-func.collect_payment#" + hash_idempotency_key(mock_event["transaction_id"]) + persistence_layer = MockPersistenceLayer(expected_idempotency_key=idempotency_key) @dataclasses.dataclass class Payment: @@ -1147,10 +1131,8 @@ def test_idempotent_function_pydantic_with_jmespath(): # GIVEN config = IdempotencyConfig(event_key_jmespath="transaction_id", use_local_cache=True) mock_event = {"customer_id": "fake", "transaction_id": "fake-id"} - persistence_layer = MockPersistenceLayer( - expected_idempotency_key="test-func.collect_payment#" - + hashlib.md5(serialize(mock_event["transaction_id"]).encode()).hexdigest() - ) + idempotency_key = "test-func.collect_payment#" + hash_idempotency_key(mock_event["transaction_id"]) + persistence_layer = MockPersistenceLayer(expected_idempotency_key=idempotency_key) class Payment(BaseModel): customer_id: str diff --git a/tests/functional/utils.py b/tests/functional/utils.py index 703f21744e2..5f1f21afc51 100644 --- a/tests/functional/utils.py +++ b/tests/functional/utils.py @@ -1,8 +1,11 @@ import base64 +import hashlib import json from pathlib import Path from typing import Any +from aws_lambda_powertools.shared.json_encoder import Encoder + def load_event(file_name: str) -> Any: path = Path(str(Path(__file__).parent.parent) + "/events/" + file_name) @@ -15,3 +18,12 @@ def str_to_b64(data: str) -> str: def b64_to_str(data: str) -> str: return base64.b64decode(data.encode()).decode("utf-8") + + +def json_serialize(data): + return json.dumps(data, sort_keys=True, cls=Encoder) + + +def hash_idempotency_key(data: Any): + """Serialize data to JSON, encode, and hash it for idempotency key""" + return hashlib.md5(json_serialize(data).encode()).hexdigest() From 676befd335fd637bc1eaa08d7fc95db5b7e0aaad Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Wed, 22 Dec 2021 12:37:14 +0100 Subject: [PATCH 11/12] docs(idempotency): document new formats --- docs/utilities/idempotency.md | 112 ++++++++++++++++++++++++++++------ 1 file changed, 95 insertions(+), 17 deletions(-) diff --git a/docs/utilities/idempotency.md b/docs/utilities/idempotency.md index 18a99b53999..5279d260d69 100644 --- a/docs/utilities/idempotency.md +++ b/docs/utilities/idempotency.md @@ -124,45 +124,50 @@ You can quickly start by initializing the `DynamoDBPersistenceLayer` class and u Similar to [idempotent decorator](#idempotent-decorator), you can use `idempotent_function` decorator for any synchronous Python function. -When using `idempotent_function`, you must tell us which keyword parameter in your function signature has the data we should use via **`data_keyword_argument`** - Such data must be JSON serializable. +When using `idempotent_function`, you must tell us which keyword parameter in your function signature has the data we should use via **`data_keyword_argument`**. + +!!! info "We support JSON serializable data, [Python Dataclasses](https://docs.python.org/3.7/library/dataclasses.html){target="_blank"}, [Parser/Pydantic Models](parser.md){target="_blank"}, and our [Event Source Data Classes](./data_classes.md){target="_blank"}." !!! warning "Make sure to call your decorated function using keyword arguments" -=== "app.py" +=== "batch_sample.py" This example also demonstrates how you can integrate with [Batch utility](batch.md), so you can process each record in an idempotent manner. - ```python hl_lines="4 13 18 25" - import uuid - - from aws_lambda_powertools.utilities.batch import sqs_batch_processor - from aws_lambda_powertools.utilities.idempotency import idempotent_function, DynamoDBPersistenceLayer, IdempotencyConfig + ```python hl_lines="4-5 16 21 29" + from aws_lambda_powertools.utilities.batch import (BatchProcessor, EventType, + batch_processor) + from aws_lambda_powertools.utilities.data_classes.sqs_event import SQSRecord + from aws_lambda_powertools.utilities.idempotency import ( + DynamoDBPersistenceLayer, IdempotencyConfig, idempotent_function) + processor = BatchProcessor(event_type=EventType.SQS) dynamodb = DynamoDBPersistenceLayer(table_name="idem") config = IdempotencyConfig( - event_key_jmespath="messageId", # see "Choosing a payload subset for idempotency" section + event_key_jmespath="messageId", # see Choosing a payload subset section use_local_cache=True, ) - @idempotent_function(data_keyword_argument="data", config=config, persistence_store=dynamodb) - def dummy(arg_one, arg_two, data: dict, **kwargs): - return {"data": data} - @idempotent_function(data_keyword_argument="record", config=config, persistence_store=dynamodb) - def record_handler(record): + def record_handler(record: SQSRecord): return {"message": record["body"]} - @sqs_batch_processor(record_handler=record_handler) + @idempotent_function(data_keyword_argument="data", config=config, persistence_store=dynamodb) + def dummy(arg_one, arg_two, data: dict, **kwargs): + return {"data": data} + + + @batch_processor(record_handler=record_handler, processor=processor) def lambda_handler(event, context): # `data` parameter must be called as a keyword argument to work dummy("hello", "universe", data="test") - return {"statusCode": 200} + return processor.response() ``` -=== "Example event" +=== "Batch event" ```json hl_lines="4" { @@ -193,6 +198,79 @@ When using `idempotent_function`, you must tell us which keyword parameter in yo } ``` +=== "dataclass_sample.py" + + ```python hl_lines="3-4 23 32" + from dataclasses import dataclass + + from aws_lambda_powertools.utilities.idempotency import ( + DynamoDBPersistenceLayer, IdempotencyConfig, idempotent_function) + + dynamodb = DynamoDBPersistenceLayer(table_name="idem") + config = IdempotencyConfig( + event_key_jmespath="messageId", # see Choosing a payload subset section + use_local_cache=True, + ) + + @dataclass + class OrderItem: + sku: str + description: str + + @dataclass + class Order: + item: OrderItem + order_id: int + + + @idempotent_function(data_keyword_argument="order", config=config, persistence_store=dynamodb) + def process_order(order: Order): + return f"processed order {order.order_id}" + + + order_item = OrderItem(sku="fake", description="sample") + order = Order(item=order_item, order_id="fake-id") + + # `order` parameter must be called as a keyword argument to work + process_order(order=order) + ``` + +=== "parser_pydantic_sample.py" + + ```python hl_lines="1-2 22 31" + from aws_lambda_powertools.utilities.idempotency import ( + DynamoDBPersistenceLayer, IdempotencyConfig, idempotent_function) + from aws_lambda_powertools.utilities.parser import BaseModel + + dynamodb = DynamoDBPersistenceLayer(table_name="idem") + config = IdempotencyConfig( + event_key_jmespath="messageId", # see Choosing a payload subset section + use_local_cache=True, + ) + + + class OrderItem(BaseModel): + sku: str + description: str + + + class Order(BaseModel): + item: OrderItem + order_id: int + + + @idempotent_function(data_keyword_argument="order", config=config, persistence_store=dynamodb) + def process_order(order: Order): + return f"processed order {order.order_id}" + + + order_item = OrderItem(sku="fake", description="sample") + order = Order(item=order_item, order_id="fake-id") + + # `order` parameter must be called as a keyword argument to work + process_order(order=order) + ``` + ### Choosing a payload subset for idempotency !!! tip "Dealing with always changing payloads" @@ -209,7 +287,7 @@ Imagine the function executes successfully, but the client never receives the re !!! warning "Idempotency for JSON payloads" The payload extracted by the `event_key_jmespath` is treated as a string by default, so will be sensitive to differences in whitespace even when the JSON payload itself is identical. - To alter this behaviour, we can use the [JMESPath built-in function](jmespath_functions.md#powertools_json-function) `powertools_json()` to treat the payload as a JSON object rather than a string. + To alter this behaviour, we can use the [JMESPath built-in function](jmespath_functions.md#powertools_json-function) `powertools_json()` to treat the payload as a JSON object (dict) rather than a string. === "payment.py" From 4f08bab9c5b4a7fec144fe9d46c79926f7ace5d9 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Wed, 22 Dec 2021 13:22:40 +0100 Subject: [PATCH 12/12] docs: fix jmespath sample using correct field --- docs/utilities/idempotency.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/utilities/idempotency.md b/docs/utilities/idempotency.md index 5279d260d69..1f4260a24b7 100644 --- a/docs/utilities/idempotency.md +++ b/docs/utilities/idempotency.md @@ -208,7 +208,7 @@ When using `idempotent_function`, you must tell us which keyword parameter in yo dynamodb = DynamoDBPersistenceLayer(table_name="idem") config = IdempotencyConfig( - event_key_jmespath="messageId", # see Choosing a payload subset section + event_key_jmespath="order_id", # see Choosing a payload subset section use_local_cache=True, ) @@ -244,7 +244,7 @@ When using `idempotent_function`, you must tell us which keyword parameter in yo dynamodb = DynamoDBPersistenceLayer(table_name="idem") config = IdempotencyConfig( - event_key_jmespath="messageId", # see Choosing a payload subset section + event_key_jmespath="order_id", # see Choosing a payload subset section use_local_cache=True, )