Skip to content

Commit 0778911

Browse files
Merge branch 'release-0.11.0'
* release-0.11.0: Bumping version to 0.11.0 Merge customizations for S3
2 parents 0323658 + aef3dfa commit 0778911

File tree

11 files changed

+268
-54
lines changed

11 files changed

+268
-54
lines changed

.changes/0.11.0.json

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
[
2+
{
3+
"category": "manager",
4+
"description": "Use CRC32 by default and support user provided full-object checksums.",
5+
"type": "feature"
6+
}
7+
]

CHANGELOG.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,12 @@
22
CHANGELOG
33
=========
44

5+
0.11.0
6+
======
7+
8+
* feature:manager: Use CRC32 by default and support user provided full-object checksums.
9+
10+
511
0.10.4
612
======
713

s3transfer/__init__.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ def __call__(self, bytes_amount):
145145
from s3transfer.exceptions import RetriesExceededError, S3UploadFailedError
146146

147147
__author__ = 'Amazon Web Services'
148-
__version__ = '0.10.4'
148+
__version__ = '0.11.0'
149149

150150

151151
class NullHandler(logging.Handler):
@@ -717,13 +717,23 @@ class S3Transfer:
717717

718718
def __init__(self, client, config=None, osutil=None):
719719
self._client = client
720+
self._client.meta.events.register(
721+
'before-call.s3.*', self._update_checksum_context
722+
)
720723
if config is None:
721724
config = TransferConfig()
722725
self._config = config
723726
if osutil is None:
724727
osutil = OSUtils()
725728
self._osutil = osutil
726729

730+
def _update_checksum_context(self, params, **kwargs):
731+
request_context = params.get("context", {})
732+
checksum_context = request_context.get("checksum", {})
733+
if "request_algorithm" in checksum_context:
734+
# Force request checksum algorithm in the header if specified.
735+
checksum_context["request_algorithm"]["in"] = "header"
736+
727737
def upload_file(
728738
self, filename, bucket, key, callback=None, extra_args=None
729739
):

s3transfer/constants.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,5 +26,13 @@
2626
'ExpectedBucketOwner',
2727
]
2828

29+
FULL_OBJECT_CHECKSUM_ARGS = [
30+
'ChecksumCRC32',
31+
'ChecksumCRC32C',
32+
'ChecksumCRC64NVME',
33+
'ChecksumSHA1',
34+
'ChecksumSHA256',
35+
]
36+
2937
USER_AGENT = f's3transfer/{s3transfer.__version__}'
3038
PROCESS_USER_AGENT = f'{USER_AGENT} processpool'

s3transfer/crt.py

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
from botocore.exceptions import NoCredentialsError
4040
from botocore.utils import ArnParser, InvalidArnException
4141

42-
from s3transfer.constants import MB
42+
from s3transfer.constants import FULL_OBJECT_CHECKSUM_ARGS, MB
4343
from s3transfer.exceptions import TransferNotDoneError
4444
from s3transfer.futures import BaseTransferFuture, BaseTransferMeta
4545
from s3transfer.manager import TransferManager
@@ -491,6 +491,9 @@ def __init__(self, session, client_kwargs=None):
491491
self._client.meta.events.register(
492492
'before-send.s3.*', self._make_fake_http_response
493493
)
494+
self._client.meta.events.register(
495+
'before-call.s3.*', self._remove_checksum_context
496+
)
494497

495498
def _resolve_client_config(self, session, client_kwargs):
496499
user_provided_config = None
@@ -620,6 +623,11 @@ def _translate_crt_s3_response_error(self, s3_response_error):
620623
error_class = self._client.exceptions.from_code(error_code)
621624
return error_class(parsed_response, operation_name=operation_name)
622625

626+
def _remove_checksum_context(self, params, **kwargs):
627+
request_context = params.get("context", {})
628+
if "checksum" in request_context:
629+
del request_context["checksum"]
630+
623631

624632
class FakeRawResponse(BytesIO):
625633
def stream(self, amt=1024, decode_content=None):
@@ -786,13 +794,18 @@ def _get_make_request_args_put_object(
786794
else:
787795
call_args.extra_args["Body"] = call_args.fileobj
788796

789-
checksum_algorithm = call_args.extra_args.pop(
790-
'ChecksumAlgorithm', 'CRC32'
791-
).upper()
792-
checksum_config = awscrt.s3.S3ChecksumConfig(
793-
algorithm=awscrt.s3.S3ChecksumAlgorithm[checksum_algorithm],
794-
location=awscrt.s3.S3ChecksumLocation.TRAILER,
795-
)
797+
checksum_config = None
798+
if not any(
799+
checksum_arg in call_args.extra_args
800+
for checksum_arg in FULL_OBJECT_CHECKSUM_ARGS
801+
):
802+
checksum_algorithm = call_args.extra_args.pop(
803+
'ChecksumAlgorithm', 'CRC32'
804+
).upper()
805+
checksum_config = awscrt.s3.S3ChecksumConfig(
806+
algorithm=awscrt.s3.S3ChecksumAlgorithm[checksum_algorithm],
807+
location=awscrt.s3.S3ChecksumLocation.TRAILER,
808+
)
796809
# Suppress botocore's automatic MD5 calculation by setting an override
797810
# value that will get deleted in the BotocoreCRTRequestSerializer.
798811
# As part of the CRT S3 request, we request the CRT S3 client to

s3transfer/manager.py

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,12 @@
1616
import threading
1717

1818
from s3transfer.bandwidth import BandwidthLimiter, LeakyBucket
19-
from s3transfer.constants import ALLOWED_DOWNLOAD_ARGS, KB, MB
19+
from s3transfer.constants import (
20+
ALLOWED_DOWNLOAD_ARGS,
21+
FULL_OBJECT_CHECKSUM_ARGS,
22+
KB,
23+
MB,
24+
)
2025
from s3transfer.copies import CopySubmissionTask
2126
from s3transfer.delete import DeleteSubmissionTask
2227
from s3transfer.download import DownloadSubmissionTask
@@ -35,8 +40,8 @@
3540
OSUtils,
3641
SlidingWindowSemaphore,
3742
TaskSemaphore,
38-
add_s3express_defaults,
3943
get_callbacks,
44+
set_default_checksum_algorithm,
4045
signal_not_transferring,
4146
signal_transferring,
4247
)
@@ -157,7 +162,7 @@ def _validate_attrs_are_nonzero(self):
157162
class TransferManager:
158163
ALLOWED_DOWNLOAD_ARGS = ALLOWED_DOWNLOAD_ARGS
159164

160-
ALLOWED_UPLOAD_ARGS = [
165+
_ALLOWED_SHARED_ARGS = [
161166
'ACL',
162167
'CacheControl',
163168
'ChecksumAlgorithm',
@@ -187,7 +192,16 @@ class TransferManager:
187192
'WebsiteRedirectLocation',
188193
]
189194

190-
ALLOWED_COPY_ARGS = ALLOWED_UPLOAD_ARGS + [
195+
ALLOWED_UPLOAD_ARGS = (
196+
_ALLOWED_SHARED_ARGS
197+
+ [
198+
'ChecksumType',
199+
'MpuObjectSize',
200+
]
201+
+ FULL_OBJECT_CHECKSUM_ARGS
202+
)
203+
204+
ALLOWED_COPY_ARGS = _ALLOWED_SHARED_ARGS + [
191205
'CopySourceIfMatch',
192206
'CopySourceIfModifiedSince',
193207
'CopySourceIfNoneMatch',
@@ -315,13 +329,13 @@ def upload(self, fileobj, bucket, key, extra_args=None, subscribers=None):
315329
:rtype: s3transfer.futures.TransferFuture
316330
:returns: Transfer future representing the upload
317331
"""
318-
if extra_args is None:
319-
extra_args = {}
332+
333+
extra_args = extra_args.copy() if extra_args else {}
320334
if subscribers is None:
321335
subscribers = []
322336
self._validate_all_known_args(extra_args, self.ALLOWED_UPLOAD_ARGS)
323337
self._validate_if_bucket_supported(bucket)
324-
self._add_operation_defaults(bucket, extra_args)
338+
self._add_operation_defaults(extra_args)
325339
call_args = CallArgs(
326340
fileobj=fileobj,
327341
bucket=bucket,
@@ -504,8 +518,8 @@ def _validate_all_known_args(self, actual, allowed):
504518
"must be one of: {}".format(kwarg, ', '.join(allowed))
505519
)
506520

507-
def _add_operation_defaults(self, bucket, extra_args):
508-
add_s3express_defaults(bucket, extra_args)
521+
def _add_operation_defaults(self, extra_args):
522+
set_default_checksum_algorithm(extra_args)
509523

510524
def _submit_transfer(
511525
self, call_args, submission_task_cls, extra_main_kwargs=None

s3transfer/upload.py

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from io import BytesIO
1515

1616
from s3transfer.compat import readable, seekable
17+
from s3transfer.constants import FULL_OBJECT_CHECKSUM_ARGS
1718
from s3transfer.futures import IN_MEMORY_UPLOAD_TAG
1819
from s3transfer.tasks import (
1920
CompleteMultipartUploadTask,
@@ -512,6 +513,10 @@ def _wrap_data(self, data, callbacks, close_callbacks):
512513
class UploadSubmissionTask(SubmissionTask):
513514
"""Task for submitting tasks to execute an upload"""
514515

516+
PUT_OBJECT_BLOCKLIST = ["ChecksumType", "MpuObjectSize"]
517+
518+
CREATE_MULTIPART_BLOCKLIST = FULL_OBJECT_CHECKSUM_ARGS + ["MpuObjectSize"]
519+
515520
UPLOAD_PART_ARGS = [
516521
'ChecksumAlgorithm',
517522
'SSECustomerKey',
@@ -527,7 +532,9 @@ class UploadSubmissionTask(SubmissionTask):
527532
'SSECustomerKeyMD5',
528533
'RequestPayer',
529534
'ExpectedBucketOwner',
530-
]
535+
'ChecksumType',
536+
'MpuObjectSize',
537+
] + FULL_OBJECT_CHECKSUM_ARGS
531538

532539
def _get_upload_input_manager_cls(self, transfer_future):
533540
"""Retrieves a class for managing input for an upload based on file type
@@ -621,6 +628,10 @@ def _submit_upload_request(
621628
):
622629
call_args = transfer_future.meta.call_args
623630

631+
put_object_extra_args = self._extra_put_object_args(
632+
call_args.extra_args
633+
)
634+
624635
# Get any tags that need to be associated to the put object task
625636
put_object_tag = self._get_upload_task_tag(
626637
upload_input_manager, 'put_object'
@@ -638,7 +649,7 @@ def _submit_upload_request(
638649
),
639650
'bucket': call_args.bucket,
640651
'key': call_args.key,
641-
'extra_args': call_args.extra_args,
652+
'extra_args': put_object_extra_args,
642653
},
643654
is_final=True,
644655
),
@@ -656,6 +667,19 @@ def _submit_multipart_request(
656667
):
657668
call_args = transfer_future.meta.call_args
658669

670+
# When a user provided checksum is passed, set "ChecksumType" to "FULL_OBJECT"
671+
# and "ChecksumAlgorithm" to the related algorithm.
672+
for checksum in FULL_OBJECT_CHECKSUM_ARGS:
673+
if checksum in call_args.extra_args:
674+
call_args.extra_args["ChecksumType"] = "FULL_OBJECT"
675+
call_args.extra_args["ChecksumAlgorithm"] = checksum.replace(
676+
"Checksum", ""
677+
)
678+
679+
create_multipart_extra_args = self._extra_create_multipart_args(
680+
call_args.extra_args
681+
)
682+
659683
# Submit the request to create a multipart upload.
660684
create_multipart_future = self._transfer_coordinator.submit(
661685
request_executor,
@@ -665,7 +689,7 @@ def _submit_multipart_request(
665689
'client': client,
666690
'bucket': call_args.bucket,
667691
'key': call_args.key,
668-
'extra_args': call_args.extra_args,
692+
'extra_args': create_multipart_extra_args,
669693
},
670694
),
671695
)
@@ -739,6 +763,16 @@ def _extra_upload_part_args(self, extra_args):
739763
def _extra_complete_multipart_args(self, extra_args):
740764
return get_filtered_dict(extra_args, self.COMPLETE_MULTIPART_ARGS)
741765

766+
def _extra_create_multipart_args(self, extra_args):
767+
return get_filtered_dict(
768+
extra_args, blocklisted_keys=self.CREATE_MULTIPART_BLOCKLIST
769+
)
770+
771+
def _extra_put_object_args(self, extra_args):
772+
return get_filtered_dict(
773+
extra_args, blocklisted_keys=self.PUT_OBJECT_BLOCKLIST
774+
)
775+
742776
def _get_upload_task_tag(self, upload_input_manager, operation_name):
743777
tag = None
744778
if upload_input_manager.stores_body_in_memory(operation_name):

s3transfer/utils.py

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,11 @@
2626
ReadTimeoutError,
2727
ResponseStreamingError,
2828
)
29-
from botocore.httpchecksum import AwsChunkedWrapper
29+
from botocore.httpchecksum import DEFAULT_CHECKSUM_ALGORITHM, AwsChunkedWrapper
3030
from botocore.utils import is_s3express_bucket
3131

3232
from s3transfer.compat import SOCKET_ERROR, fallocate, rename_file
33+
from s3transfer.constants import FULL_OBJECT_CHECKSUM_ARGS
3334

3435
MAX_PARTS = 10000
3536
# The maximum file size you can upload via S3 per request.
@@ -148,20 +149,27 @@ def invoke_progress_callbacks(callbacks, bytes_transferred):
148149
callback(bytes_transferred=bytes_transferred)
149150

150151

151-
def get_filtered_dict(original_dict, whitelisted_keys):
152-
"""Gets a dictionary filtered by whitelisted keys
152+
def get_filtered_dict(
153+
original_dict, whitelisted_keys=None, blocklisted_keys=None
154+
):
155+
"""Gets a dictionary filtered by whitelisted and blocklisted keys.
153156
154157
:param original_dict: The original dictionary of arguments to source keys
155158
and values.
156159
:param whitelisted_key: A list of keys to include in the filtered
157160
dictionary.
161+
:param blocklisted_key: A list of keys to exclude in the filtered
162+
dictionary.
158163
159164
:returns: A dictionary containing key/values from the original dictionary
160-
whose key was included in the whitelist
165+
whose key was included in the whitelist and/or not included in the
166+
blocklist.
161167
"""
162168
filtered_dict = {}
163169
for key, value in original_dict.items():
164-
if key in whitelisted_keys:
170+
if (whitelisted_keys and key in whitelisted_keys) or (
171+
blocklisted_keys and key not in blocklisted_keys
172+
):
165173
filtered_dict[key] = value
166174
return filtered_dict
167175

@@ -809,6 +817,17 @@ def _adjust_for_max_parts(self, current_chunksize, file_size):
809817

810818

811819
def add_s3express_defaults(bucket, extra_args):
820+
"""
821+
This function has been deprecated, but is kept for backwards compatibility.
822+
This function is subject to removal in a future release.
823+
"""
812824
if is_s3express_bucket(bucket) and "ChecksumAlgorithm" not in extra_args:
813825
# Default Transfer Operations to S3Express to use CRC32
814826
extra_args["ChecksumAlgorithm"] = "crc32"
827+
828+
829+
def set_default_checksum_algorithm(extra_args):
830+
"""Set the default algorithm to CRC32 if not specified by the user."""
831+
if any(checksum in extra_args for checksum in FULL_OBJECT_CHECKSUM_ARGS):
832+
return
833+
extra_args.setdefault("ChecksumAlgorithm", DEFAULT_CHECKSUM_ALGORITHM)

0 commit comments

Comments
 (0)