fix: use a larger chunk size when loading data (#799)

Jim Fulton · web-flow · commit b804373277c1 · 2021-07-22T13:36:30.000-05:00
* The chunk size used for data uploads was too small (1MB). Now it's 100MB.

* fix: The chunk size used for data uploads was too small
diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py
@@ -98,7 +98,7 @@
 from google.cloud.bigquery.table import RowIterator
 
 
-_DEFAULT_CHUNKSIZE = 1048576  # 1024 * 1024 B = 1 MB
+_DEFAULT_CHUNKSIZE = 100 * 1024 * 1024  # 100 MB
 _MAX_MULTIPART_SIZE = 5 * 1024 * 1024
 _DEFAULT_NUM_RETRIES = 6
 _BASE_UPLOAD_TEMPLATE = "{host}/upload/bigquery/v2/projects/{project}/jobs?uploadType="
diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py
@@ -8076,3 +8076,23 @@ def test_schema_to_json_with_file_object(self):
 
         client.schema_to_json(schema_list, fake_file)
         assert file_content == json.loads(fake_file.getvalue())
+
+
+def test_upload_chunksize(client):
+    with mock.patch("google.cloud.bigquery.client.ResumableUpload") as RU:
+        upload = RU.return_value
+
+        upload.finished = False
+
+        def transmit_next_chunk(transport):
+            upload.finished = True
+            result = mock.MagicMock()
+            result.json.return_value = {}
+            return result
+
+        upload.transmit_next_chunk = transmit_next_chunk
+        f = io.BytesIO()
+        client.load_table_from_file(f, "foo.bar")
+
+        chunk_size = RU.call_args_list[0][0][1]
+        assert chunk_size == 100 * (1 << 20)