41
41
from .helpers import _make_job_resource
42
42
43
43
44
+ @pytest .fixture
45
+ def table_read_options_kwarg ():
46
+ # Create a BigQuery Storage table read options object with pyarrow compression
47
+ # enabled if a recent-enough version of google-cloud-bigquery-storage dependency is
48
+ # installed to support the compression.
49
+ if not hasattr (bigquery_storage , "ArrowSerializationOptions" ):
50
+ return {}
51
+
52
+ read_options = bigquery_storage .ReadSession .TableReadOptions (
53
+ arrow_serialization_options = bigquery_storage .ArrowSerializationOptions (
54
+ buffer_compression = bigquery_storage .ArrowSerializationOptions .CompressionCodec .LZ4_FRAME
55
+ )
56
+ )
57
+ return {"read_options" : read_options }
58
+
59
+
44
60
@pytest .mark .parametrize (
45
61
"query,expected" ,
46
62
(
@@ -82,7 +98,7 @@ def test__contains_order_by(query, expected):
82
98
"SelecT name, age froM table OrdeR \n \t BY other_column;" ,
83
99
),
84
100
)
85
- def test_to_dataframe_bqstorage_preserve_order (query ):
101
+ def test_to_dataframe_bqstorage_preserve_order (query , table_read_options_kwarg ):
86
102
from google .cloud .bigquery .job import QueryJob as target_class
87
103
88
104
job_resource = _make_job_resource (
@@ -123,8 +139,10 @@ def test_to_dataframe_bqstorage_preserve_order(query):
123
139
destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}" .format (
124
140
** job_resource ["configuration" ]["query" ]["destinationTable" ]
125
141
)
126
- expected_session = bigquery_storage .types .ReadSession (
127
- table = destination_table , data_format = bigquery_storage .types .DataFormat .ARROW ,
142
+ expected_session = bigquery_storage .ReadSession (
143
+ table = destination_table ,
144
+ data_format = bigquery_storage .DataFormat .ARROW ,
145
+ ** table_read_options_kwarg ,
128
146
)
129
147
bqstorage_client .create_read_session .assert_called_once_with (
130
148
parent = "projects/test-project" ,
@@ -431,7 +449,7 @@ def test_to_dataframe_ddl_query():
431
449
@pytest .mark .skipif (
432
450
bigquery_storage is None , reason = "Requires `google-cloud-bigquery-storage`"
433
451
)
434
- def test_to_dataframe_bqstorage ():
452
+ def test_to_dataframe_bqstorage (table_read_options_kwarg ):
435
453
from google .cloud .bigquery .job import QueryJob as target_class
436
454
437
455
resource = _make_job_resource (job_type = "query" , ended = True )
@@ -468,8 +486,10 @@ def test_to_dataframe_bqstorage():
468
486
destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}" .format (
469
487
** resource ["configuration" ]["query" ]["destinationTable" ]
470
488
)
471
- expected_session = bigquery_storage .types .ReadSession (
472
- table = destination_table , data_format = bigquery_storage .types .DataFormat .ARROW ,
489
+ expected_session = bigquery_storage .ReadSession (
490
+ table = destination_table ,
491
+ data_format = bigquery_storage .DataFormat .ARROW ,
492
+ ** table_read_options_kwarg ,
473
493
)
474
494
bqstorage_client .create_read_session .assert_called_once_with (
475
495
parent = f"projects/{ client .project } " ,
@@ -478,6 +498,52 @@ def test_to_dataframe_bqstorage():
478
498
)
479
499
480
500
501
+ @pytest .mark .skipif (pandas is None , reason = "Requires `pandas`" )
502
+ @pytest .mark .skipif (
503
+ bigquery_storage is None , reason = "Requires `google-cloud-bigquery-storage`"
504
+ )
505
+ def test_to_dataframe_bqstorage_no_pyarrow_compression ():
506
+ from google .cloud .bigquery .job import QueryJob as target_class
507
+
508
+ resource = _make_job_resource (job_type = "query" , ended = True )
509
+ query_resource = {
510
+ "jobComplete" : True ,
511
+ "jobReference" : resource ["jobReference" ],
512
+ "totalRows" : "4" ,
513
+ "schema" : {"fields" : [{"name" : "name" , "type" : "STRING" , "mode" : "NULLABLE" }]},
514
+ }
515
+ connection = _make_connection (query_resource )
516
+ client = _make_client (connection = connection )
517
+ job = target_class .from_api_repr (resource , client )
518
+ bqstorage_client = mock .create_autospec (bigquery_storage .BigQueryReadClient )
519
+ session = bigquery_storage .types .ReadSession ()
520
+ session .avro_schema .schema = json .dumps (
521
+ {
522
+ "type" : "record" ,
523
+ "name" : "__root__" ,
524
+ "fields" : [{"name" : "name" , "type" : ["null" , "string" ]}],
525
+ }
526
+ )
527
+ bqstorage_client .create_read_session .return_value = session
528
+
529
+ with mock .patch (
530
+ "google.cloud.bigquery._pandas_helpers._ARROW_COMPRESSION_SUPPORT" , new = False
531
+ ):
532
+ job .to_dataframe (bqstorage_client = bqstorage_client )
533
+
534
+ destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}" .format (
535
+ ** resource ["configuration" ]["query" ]["destinationTable" ]
536
+ )
537
+ expected_session = bigquery_storage .ReadSession (
538
+ table = destination_table , data_format = bigquery_storage .DataFormat .ARROW ,
539
+ )
540
+ bqstorage_client .create_read_session .assert_called_once_with (
541
+ parent = f"projects/{ client .project } " ,
542
+ read_session = expected_session ,
543
+ max_stream_count = 0 ,
544
+ )
545
+
546
+
481
547
@pytest .mark .skipif (pandas is None , reason = "Requires `pandas`" )
482
548
def test_to_dataframe_column_dtypes ():
483
549
from google .cloud .bigquery .job import QueryJob as target_class
0 commit comments