Skip to content

Commit f3517bf

Browse files
committed
feat: Enable custom predicates for media operations (#1385)
1 parent 5375fa0 commit f3517bf

24 files changed

+292
-907
lines changed

README.rst

+6-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,9 @@ Preview Release
4545

4646
Python Storage 3.0 is currently in a preview state. If you experience that
4747
backwards compatibility for your application is broken with this release for any
48-
reason, please let us know through the Github issues system. Thank you.
48+
reason, please let us know through the Github issues system. While some breaks
49+
of backwards compatibility may be unavoidable due to new features in the major
50+
version release, we will do our best to minimize them. Thank you.
4951

5052
Exception Handling
5153
~~~~~~~~~~~~~~~~~~
@@ -88,6 +90,9 @@ Miscellaneous
8890

8991
- The BlobWriter class now attempts to terminate an ongoing resumable upload if
9092
the writer exits with an exception.
93+
- Retry behavior is now identical between media operations (uploads and
94+
downloads) and other operations, and custom predicates are now supported for
95+
media operations as well.
9196

9297
Quick Start
9398
-----------

google/cloud/storage/_helpers.py

-24
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
from uuid import uuid4
2727

2828
from google.auth import environment_vars
29-
from google.cloud.storage import _media
3029
from google.cloud.storage.constants import _DEFAULT_TIMEOUT
3130
from google.cloud.storage.retry import DEFAULT_RETRY
3231
from google.cloud.storage.retry import DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED
@@ -628,29 +627,6 @@ def _bucket_bound_hostname_url(host, scheme=None):
628627
return f"{scheme}://{host}"
629628

630629

631-
def _api_core_retry_to_resumable_media_retry(retry):
632-
"""Convert google.api.core.Retry to google.cloud.storage._media.RetryStrategy.
633-
634-
Custom predicates are not translated.
635-
636-
:type retry: google.api_core.Retry
637-
:param retry: (Optional) The google.api_core.Retry object to translate.
638-
639-
:rtype: google.cloud.storage._media.RetryStrategy
640-
:returns: A RetryStrategy with all applicable attributes copied from input.
641-
"""
642-
643-
if retry is not None:
644-
return _media.RetryStrategy(
645-
max_sleep=retry._maximum,
646-
max_cumulative_retry=retry._deadline,
647-
initial_delay=retry._initial,
648-
multiplier=retry._multiplier,
649-
)
650-
else:
651-
return _media.RetryStrategy(max_retries=0)
652-
653-
654630
def _get_invocation_id():
655631
return "gccl-invocation-id/" + str(uuid4())
656632

google/cloud/storage/_media/__init__.py

-2
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,9 @@
2626
.. _requests: http://docs.python-requests.org/
2727
"""
2828

29-
from google.cloud.storage._media.common import RetryStrategy
3029
from google.cloud.storage._media.common import UPLOAD_CHUNK_SIZE
3130

3231

3332
__all__ = [
34-
"RetryStrategy",
3533
"UPLOAD_CHUNK_SIZE",
3634
]

google/cloud/storage/_media/_download.py

+54-6
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919
import re
2020

2121
from google.cloud.storage._media import _helpers
22-
from google.cloud.storage._media import common
2322
from google.cloud.storage.exceptions import InvalidResponse
23+
from google.cloud.storage.retry import DEFAULT_RETRY
2424

2525

2626
_CONTENT_RANGE_RE = re.compile(
@@ -45,14 +45,30 @@ class DownloadBase(object):
4545
end (int): The last byte in a range to be downloaded.
4646
headers (Optional[Mapping[str, str]]): Extra headers that should
4747
be sent with the request, e.g. headers for encrypted data.
48+
retry (Optional[google.api_core.retry.Retry]): How to retry the RPC.
49+
A None value will disable retries. A google.api_core.retry.Retry
50+
value will enable retries, and the object will configure backoff and
51+
timeout options.
52+
53+
See the retry.py source code and docstrings in this package
54+
(google.cloud.storage.retry) for information on retry types and how
55+
to configure them.
4856
4957
Attributes:
5058
media_url (str): The URL containing the media to be downloaded.
5159
start (Optional[int]): The first byte in a range to be downloaded.
5260
end (Optional[int]): The last byte in a range to be downloaded.
5361
"""
5462

55-
def __init__(self, media_url, stream=None, start=None, end=None, headers=None):
63+
def __init__(
64+
self,
65+
media_url,
66+
stream=None,
67+
start=None,
68+
end=None,
69+
headers=None,
70+
retry=DEFAULT_RETRY,
71+
):
5672
self.media_url = media_url
5773
self._stream = stream
5874
self.start = start
@@ -61,7 +77,7 @@ def __init__(self, media_url, stream=None, start=None, end=None, headers=None):
6177
headers = {}
6278
self._headers = headers
6379
self._finished = False
64-
self._retry_strategy = common.RetryStrategy()
80+
self._retry_strategy = retry
6581

6682
@property
6783
def finished(self):
@@ -133,6 +149,15 @@ class Download(DownloadBase):
133149
values are "md5", "crc32c", "auto" and None. The default is "auto",
134150
which will try to detect if the C extension for crc32c is installed
135151
and fall back to md5 otherwise.
152+
retry (Optional[google.api_core.retry.Retry]): How to retry the
153+
RPC. A None value will disable retries. A
154+
google.api_core.retry.Retry value will enable retries, and the
155+
object will configure backoff and timeout options.
156+
157+
See the retry.py source code and docstrings in this package
158+
(google.cloud.storage.retry) for information on retry types and how
159+
to configure them.
160+
136161
"""
137162

138163
def __init__(
@@ -143,9 +168,10 @@ def __init__(
143168
end=None,
144169
headers=None,
145170
checksum="auto",
171+
retry=DEFAULT_RETRY,
146172
):
147173
super(Download, self).__init__(
148-
media_url, stream=stream, start=start, end=end, headers=headers
174+
media_url, stream=stream, start=start, end=end, headers=headers, retry=retry
149175
)
150176
self.checksum = checksum
151177
if self.checksum == "auto":
@@ -242,6 +268,14 @@ class ChunkedDownload(DownloadBase):
242268
headers (Optional[Mapping[str, str]]): Extra headers that should
243269
be sent with each request, e.g. headers for data encryption
244270
key headers.
271+
retry (Optional[google.api_core.retry.Retry]): How to retry the
272+
RPC. A None value will disable retries. A
273+
google.api_core.retry.Retry value will enable retries, and the
274+
object will configure backoff and timeout options.
275+
276+
See the retry.py source code and docstrings in this package
277+
(google.cloud.storage.retry) for information on retry types and how
278+
to configure them.
245279
246280
Attributes:
247281
media_url (str): The URL containing the media to be downloaded.
@@ -253,13 +287,27 @@ class ChunkedDownload(DownloadBase):
253287
ValueError: If ``start`` is negative.
254288
"""
255289

256-
def __init__(self, media_url, chunk_size, stream, start=0, end=None, headers=None):
290+
def __init__(
291+
self,
292+
media_url,
293+
chunk_size,
294+
stream,
295+
start=0,
296+
end=None,
297+
headers=None,
298+
retry=DEFAULT_RETRY,
299+
):
257300
if start < 0:
258301
raise ValueError(
259302
"On a chunked download the starting " "value cannot be negative."
260303
)
261304
super(ChunkedDownload, self).__init__(
262-
media_url, stream=stream, start=start, end=end, headers=headers
305+
media_url,
306+
stream=stream,
307+
start=start,
308+
end=end,
309+
headers=headers,
310+
retry=retry,
263311
)
264312
self.chunk_size = chunk_size
265313
self._bytes_downloaded = 0

google/cloud/storage/_media/_helpers.py

+2-31
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,13 @@
1919
import base64
2020
import hashlib
2121
import logging
22-
import random
2322

2423
from urllib.parse import parse_qs
2524
from urllib.parse import urlencode
2625
from urllib.parse import urlsplit
2726
from urllib.parse import urlunsplit
2827

29-
from google.cloud.storage._media import common
28+
from google.cloud.storage import retry
3029
from google.cloud.storage.exceptions import InvalidResponse
3130

3231

@@ -101,7 +100,7 @@ def require_status_code(response, status_codes, get_status_code, callback=do_not
101100
"""
102101
status_code = get_status_code(response)
103102
if status_code not in status_codes:
104-
if status_code not in common.RETRYABLE:
103+
if status_code not in retry._RETRYABLE_STATUS_CODES:
105104
callback()
106105
raise InvalidResponse(
107106
response,
@@ -113,34 +112,6 @@ def require_status_code(response, status_codes, get_status_code, callback=do_not
113112
return status_code
114113

115114

116-
def calculate_retry_wait(base_wait, max_sleep, multiplier=2.0):
117-
"""Calculate the amount of time to wait before a retry attempt.
118-
119-
Wait time grows exponentially with the number of attempts, until
120-
``max_sleep``.
121-
122-
A random amount of jitter (between 0 and 1 seconds) is added to spread out
123-
retry attempts from different clients.
124-
125-
Args:
126-
base_wait (float): The "base" wait time (i.e. without any jitter)
127-
that will be multiplied until it reaches the maximum sleep.
128-
max_sleep (float): Maximum value that a sleep time is allowed to be.
129-
multiplier (float): Multiplier to apply to the base wait.
130-
131-
Returns:
132-
Tuple[float, float]: The new base wait time as well as the wait time
133-
to be applied (with a random amount of jitter between 0 and 1 seconds
134-
added).
135-
"""
136-
new_base_wait = multiplier * base_wait
137-
if new_base_wait > max_sleep:
138-
new_base_wait = max_sleep
139-
140-
jitter_ms = random.randint(0, 1000)
141-
return new_base_wait, new_base_wait + 0.001 * jitter_ms
142-
143-
144115
def _get_metadata_key(checksum_type):
145116
if checksum_type == "md5":
146117
return "md5Hash"

0 commit comments

Comments
 (0)