@@ -7307,6 +7307,134 @@ def test_load_table_from_dataframe_struct_fields(self):
7307
7307
assert sent_config .source_format == job .SourceFormat .PARQUET
7308
7308
assert sent_config .schema == schema
7309
7309
7310
+ @unittest .skipIf (pandas is None , "Requires `pandas`" )
7311
+ @unittest .skipIf (pyarrow is None , "Requires `pyarrow`" )
7312
+ def test_load_table_from_dataframe_array_fields (self ):
7313
+ """Test that a DataFrame with array columns can be uploaded correctly.
7314
+
7315
+ See: https://github.com/googleapis/python-bigquery/issues/19
7316
+ """
7317
+ from google .cloud .bigquery .client import _DEFAULT_NUM_RETRIES
7318
+ from google .cloud .bigquery import job
7319
+ from google .cloud .bigquery .schema import SchemaField
7320
+
7321
+ client = self ._make_client ()
7322
+
7323
+ records = [(3.14 , [1 , 2 ])]
7324
+ dataframe = pandas .DataFrame (
7325
+ data = records , columns = ["float_column" , "array_column" ]
7326
+ )
7327
+
7328
+ schema = [
7329
+ SchemaField ("float_column" , "FLOAT" ),
7330
+ SchemaField (
7331
+ "array_column" ,
7332
+ "INTEGER" ,
7333
+ mode = "REPEATED" ,
7334
+ ),
7335
+ ]
7336
+ job_config = job .LoadJobConfig (schema = schema )
7337
+
7338
+ load_patch = mock .patch (
7339
+ "google.cloud.bigquery.client.Client.load_table_from_file" , autospec = True
7340
+ )
7341
+
7342
+ get_table_patch = mock .patch (
7343
+ "google.cloud.bigquery.client.Client.get_table" ,
7344
+ autospec = True ,
7345
+ side_effect = google .api_core .exceptions .NotFound ("Table not found" ),
7346
+ )
7347
+
7348
+ with load_patch as load_table_from_file , get_table_patch :
7349
+ client .load_table_from_dataframe (
7350
+ dataframe ,
7351
+ self .TABLE_REF ,
7352
+ job_config = job_config ,
7353
+ location = self .LOCATION ,
7354
+ )
7355
+
7356
+ load_table_from_file .assert_called_once_with (
7357
+ client ,
7358
+ mock .ANY ,
7359
+ self .TABLE_REF ,
7360
+ num_retries = _DEFAULT_NUM_RETRIES ,
7361
+ rewind = True ,
7362
+ size = mock .ANY ,
7363
+ job_id = mock .ANY ,
7364
+ job_id_prefix = None ,
7365
+ location = self .LOCATION ,
7366
+ project = None ,
7367
+ job_config = mock .ANY ,
7368
+ timeout = DEFAULT_TIMEOUT ,
7369
+ )
7370
+
7371
+ sent_config = load_table_from_file .mock_calls [0 ][2 ]["job_config" ]
7372
+ assert sent_config .source_format == job .SourceFormat .PARQUET
7373
+ assert sent_config .schema == schema
7374
+
7375
+ @unittest .skipIf (pandas is None , "Requires `pandas`" )
7376
+ @unittest .skipIf (pyarrow is None , "Requires `pyarrow`" )
7377
+ def test_load_table_from_dataframe_array_fields_w_auto_schema (self ):
7378
+ """Test that a DataFrame with array columns can be uploaded correctly.
7379
+
7380
+ See: https://github.com/googleapis/python-bigquery/issues/19
7381
+ """
7382
+ from google .cloud .bigquery .client import _DEFAULT_NUM_RETRIES
7383
+ from google .cloud .bigquery import job
7384
+ from google .cloud .bigquery .schema import SchemaField
7385
+
7386
+ client = self ._make_client ()
7387
+
7388
+ records = [(3.14 , [1 , 2 ])]
7389
+ dataframe = pandas .DataFrame (
7390
+ data = records , columns = ["float_column" , "array_column" ]
7391
+ )
7392
+
7393
+ expected_schema = [
7394
+ SchemaField ("float_column" , "FLOAT" ),
7395
+ SchemaField (
7396
+ "array_column" ,
7397
+ "INT64" ,
7398
+ mode = "REPEATED" ,
7399
+ ),
7400
+ ]
7401
+
7402
+ load_patch = mock .patch (
7403
+ "google.cloud.bigquery.client.Client.load_table_from_file" , autospec = True
7404
+ )
7405
+
7406
+ get_table_patch = mock .patch (
7407
+ "google.cloud.bigquery.client.Client.get_table" ,
7408
+ autospec = True ,
7409
+ side_effect = google .api_core .exceptions .NotFound ("Table not found" ),
7410
+ )
7411
+
7412
+ with load_patch as load_table_from_file , get_table_patch :
7413
+ client .load_table_from_dataframe (
7414
+ dataframe ,
7415
+ self .TABLE_REF ,
7416
+ location = self .LOCATION ,
7417
+ )
7418
+
7419
+ load_table_from_file .assert_called_once_with (
7420
+ client ,
7421
+ mock .ANY ,
7422
+ self .TABLE_REF ,
7423
+ num_retries = _DEFAULT_NUM_RETRIES ,
7424
+ rewind = True ,
7425
+ size = mock .ANY ,
7426
+ job_id = mock .ANY ,
7427
+ job_id_prefix = None ,
7428
+ location = self .LOCATION ,
7429
+ project = None ,
7430
+ job_config = mock .ANY ,
7431
+ timeout = DEFAULT_TIMEOUT ,
7432
+ )
7433
+
7434
+ sent_config = load_table_from_file .mock_calls [0 ][2 ]["job_config" ]
7435
+ assert sent_config .source_format == job .SourceFormat .PARQUET
7436
+ assert sent_config .schema == expected_schema
7437
+
7310
7438
@unittest .skipIf (pandas is None , "Requires `pandas`" )
7311
7439
@unittest .skipIf (pyarrow is None , "Requires `pyarrow`" )
7312
7440
def test_load_table_from_dataframe_w_partial_schema (self ):
0 commit comments