Skip to content

Commit b804373

Browse files
author
Jim Fulton
authored
fix: use a larger chunk size when loading data (#799)
* The chunk size used for data uploads was too small (1MB). Now it's 100MB. * fix: The chunk size used for data uploads was too small
1 parent f0990f2 commit b804373

File tree

2 files changed

+21
-1
lines changed

2 files changed

+21
-1
lines changed

google/cloud/bigquery/client.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@
9898
from google.cloud.bigquery.table import RowIterator
9999

100100

101-
_DEFAULT_CHUNKSIZE = 1048576 # 1024 * 1024 B = 1 MB
101+
_DEFAULT_CHUNKSIZE = 100 * 1024 * 1024 # 100 MB
102102
_MAX_MULTIPART_SIZE = 5 * 1024 * 1024
103103
_DEFAULT_NUM_RETRIES = 6
104104
_BASE_UPLOAD_TEMPLATE = "{host}/upload/bigquery/v2/projects/{project}/jobs?uploadType="

tests/unit/test_client.py

+20
Original file line numberDiff line numberDiff line change
@@ -8076,3 +8076,23 @@ def test_schema_to_json_with_file_object(self):
80768076

80778077
client.schema_to_json(schema_list, fake_file)
80788078
assert file_content == json.loads(fake_file.getvalue())
8079+
8080+
8081+
def test_upload_chunksize(client):
8082+
with mock.patch("google.cloud.bigquery.client.ResumableUpload") as RU:
8083+
upload = RU.return_value
8084+
8085+
upload.finished = False
8086+
8087+
def transmit_next_chunk(transport):
8088+
upload.finished = True
8089+
result = mock.MagicMock()
8090+
result.json.return_value = {}
8091+
return result
8092+
8093+
upload.transmit_next_chunk = transmit_next_chunk
8094+
f = io.BytesIO()
8095+
client.load_table_from_file(f, "foo.bar")
8096+
8097+
chunk_size = RU.call_args_list[0][0][1]
8098+
assert chunk_size == 100 * (1 << 20)

0 commit comments

Comments
 (0)