Skip to content

Commit 37ab5ba

Browse files
WeyaxiWauplin
andauthored
Handle Rate Limits in Pagination with Automatic Retries (#2970)
* rate limit handling * Import http_backoff (missed it earlier, mb) * fix tests --------- Co-authored-by: Lucain <[email protected]> Co-authored-by: Lucain Pouget <[email protected]>
1 parent 2cef17b commit 37ab5ba

File tree

2 files changed

+19
-15
lines changed

2 files changed

+19
-15
lines changed

src/huggingface_hub/utils/_pagination.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
import requests
2020

21-
from . import get_session, hf_raise_for_status, logging
21+
from . import get_session, hf_raise_for_status, http_backoff, logging
2222

2323

2424
logger = logging.get_logger(__name__)
@@ -42,7 +42,7 @@ def paginate(path: str, params: Dict, headers: Dict) -> Iterable:
4242
next_page = _get_next_page(r)
4343
while next_page is not None:
4444
logger.debug(f"Pagination detected. Requesting next page: {next_page}")
45-
r = session.get(next_page, headers=headers)
45+
r = http_backoff("GET", next_page, max_retries=20, retry_on_status_codes=429, headers=headers)
4646
hf_raise_for_status(r)
4747
yield from r.json()
4848
next_page = _get_next_page(r)

tests/test_utils_pagination.py

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,12 @@
88

99
class TestPagination(unittest.TestCase):
1010
@patch("huggingface_hub.utils._pagination.get_session")
11+
@patch("huggingface_hub.utils._pagination.http_backoff")
1112
@patch("huggingface_hub.utils._pagination.hf_raise_for_status")
1213
@handle_injection_in_test
13-
def test_mocked_paginate(self, mock_get_session: Mock, mock_hf_raise_for_status: Mock) -> None:
14+
def test_mocked_paginate(
15+
self, mock_get_session: Mock, mock_http_backoff: Mock, mock_hf_raise_for_status: Mock
16+
) -> None:
1417
mock_get = mock_get_session().get
1518
mock_params = Mock()
1619
mock_headers = Mock()
@@ -33,31 +36,32 @@ def test_mocked_paginate(self, mock_get_session: Mock, mock_hf_raise_for_status:
3336
# Mock response
3437
mock_get.side_effect = [
3538
mock_response_page_1,
39+
]
40+
mock_http_backoff.side_effect = [
3641
mock_response_page_2,
3742
mock_response_page_3,
3843
]
3944

4045
results = paginate("url", params=mock_params, headers=mock_headers)
4146

4247
# Requests are made only when generator is yielded
43-
self.assertEqual(mock_get.call_count, 0)
48+
assert mock_get.call_count == 0
4449

4550
# Results after concatenating pages
46-
self.assertListEqual(list(results), [1, 2, 3, 4, 5, 6, 7, 8])
51+
assert list(results) == [1, 2, 3, 4, 5, 6, 7, 8]
4752

4853
# All pages requested: 3 requests, 3 raise for status
49-
self.assertEqual(mock_get.call_count, 3)
50-
self.assertEqual(mock_hf_raise_for_status.call_count, 3)
54+
# First request with `get_session.get` (we want at least 1 request to succeed correctly) and 2 with `http_backoff`
55+
assert mock_get.call_count == 1
56+
assert mock_http_backoff.call_count == 2
57+
assert mock_hf_raise_for_status.call_count == 3
5158

5259
# Params not passed to next pages
53-
self.assertListEqual(
54-
mock_get.call_args_list,
55-
[
56-
call("url", params=mock_params, headers=mock_headers),
57-
call("url_p2", headers=mock_headers),
58-
call("url_p3", headers=mock_headers),
59-
],
60-
)
60+
assert mock_get.call_args_list == [call("url", params=mock_params, headers=mock_headers)]
61+
assert mock_http_backoff.call_args_list == [
62+
call("GET", "url_p2", max_retries=20, retry_on_status_codes=429, headers=mock_headers),
63+
call("GET", "url_p3", max_retries=20, retry_on_status_codes=429, headers=mock_headers),
64+
]
6165

6266
def test_paginate_github_api(self) -> None:
6367
# Real test: paginate over huggingface repos on Github

0 commit comments

Comments
 (0)