From 6b5d89f1662d84dfd9ac56b06c283adc678b1819 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 22 Feb 2022 16:00:51 -0600 Subject: [PATCH] fix: avoid `TypeError` when executing DML statements with `read_gbq` --- pandas_gbq/gbq.py | 6 ++++++ tests/system/test_read_gbq.py | 30 ++++++++++++++++++++++++++++++ tests/unit/test_gbq.py | 25 +++++++++++++++++++++---- 3 files changed, 57 insertions(+), 4 deletions(-) diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 1157c37b..6c9b6804 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -410,6 +410,7 @@ def run_query(self, query, max_results=None, progress_bar_type=None, **kwargs): from concurrent.futures import TimeoutError from google.auth.exceptions import RefreshError from google.cloud import bigquery + import pandas job_config = { "query": { @@ -495,6 +496,11 @@ def run_query(self, query, max_results=None, progress_bar_type=None, **kwargs): except self.http_error as ex: self.process_http_error(ex) + # Avoid attempting to download results from DML queries, which have no + # destination. + if query_reply.destination is None: + return pandas.DataFrame() + rows_iter = self.client.list_rows( query_reply.destination, max_results=max_results ) diff --git a/tests/system/test_read_gbq.py b/tests/system/test_read_gbq.py index a13e830f..65a65ff7 100644 --- a/tests/system/test_read_gbq.py +++ b/tests/system/test_read_gbq.py @@ -5,8 +5,10 @@ import collections import datetime import decimal +import random import db_dtypes +from google.cloud import bigquery import pandas import pandas.testing import pytest @@ -21,6 +23,21 @@ ) +@pytest.fixture +def writable_table( + bigquery_client: bigquery.Client, project_id: str, random_dataset: bigquery.Dataset +): + full_table_id = f"{project_id}.{random_dataset.dataset_id}.writable_table_{random.randrange(1_000_000_000)}" + table = bigquery.Table(full_table_id) + table.schema = [ + bigquery.SchemaField("field1", "STRING"), + bigquery.SchemaField("field2", "INTEGER"), + ] + bigquery_client.create_table(table) + yield full_table_id + bigquery_client.delete_table(full_table_id) + + @pytest.mark.parametrize(["use_bqstorage_api"], [(True,), (False,)]) @pytest.mark.parametrize( ["query", "expected", "use_bqstorage_apis"], @@ -605,3 +622,16 @@ def test_empty_dataframe(read_gbq, use_bqstorage_api): ) result = read_gbq(query, use_bqstorage_api=use_bqstorage_api) pandas.testing.assert_frame_equal(result, expected, check_index_type=False) + + +def test_dml_query(read_gbq, writable_table: str): + query = f""" + UPDATE `{writable_table}` + SET field1 = NULL + WHERE field1 = 'string'; + UPDATE `{writable_table}` + SET field2 = NULL + WHERE field2 < 0; + """ + result = read_gbq(query) + assert result is not None diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index 74bec5ed..511e68d6 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -32,18 +32,23 @@ def mock_get_credentials_no_project(*args, **kwargs): return mock_credentials, None -@pytest.fixture(autouse=True) -def default_bigquery_client(mock_bigquery_client): +@pytest.fixture +def mock_query_job(): mock_query = mock.create_autospec(google.cloud.bigquery.QueryJob) mock_query.job_id = "some-random-id" mock_query.state = "DONE" + return mock_query + + +@pytest.fixture(autouse=True) +def default_bigquery_client(mock_bigquery_client, mock_query_job): mock_rows = mock.create_autospec(google.cloud.bigquery.table.RowIterator) mock_rows.total_rows = 1 mock_rows.__iter__.return_value = [(1,)] - mock_query.result.return_value = mock_rows + mock_query_job.result.return_value = mock_rows mock_bigquery_client.list_rows.return_value = mock_rows - mock_bigquery_client.query.return_value = mock_query + mock_bigquery_client.query.return_value = mock_query_job # Mock out SELECT 1 query results. def generate_schema(): @@ -718,3 +723,15 @@ def test_read_gbq_with_list_rows_error_translates_exception( ) def test_query_response_bytes(size_in_bytes, formatted_text): assert gbq.GbqConnector.sizeof_fmt(size_in_bytes) == formatted_text + + +def test_run_query_with_dml_query(mock_bigquery_client, mock_query_job): + """ + Don't attempt to download results from a DML query / query with no results. + + https://github.com/googleapis/python-bigquery-pandas/issues/481 + """ + connector = _make_connector() + type(mock_query_job).destination = mock.PropertyMock(return_value=None) + connector.run_query("UPDATE tablename SET value = '';") + mock_bigquery_client.list_rows.assert_not_called()