Skip to content

fix: avoid TypeError when executing DML statements with read_gbq #483

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Feb 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions pandas_gbq/gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,7 @@ def run_query(self, query, max_results=None, progress_bar_type=None, **kwargs):
from concurrent.futures import TimeoutError
from google.auth.exceptions import RefreshError
from google.cloud import bigquery
import pandas

job_config = {
"query": {
Expand Down Expand Up @@ -495,6 +496,11 @@ def run_query(self, query, max_results=None, progress_bar_type=None, **kwargs):
except self.http_error as ex:
self.process_http_error(ex)

# Avoid attempting to download results from DML queries, which have no
# destination.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will also impact scripts as well; do you need additional handling for that case (example: the last line in a script is a SELECT)

Copy link
Collaborator Author

@tswast tswast Feb 24, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's on the TODO list: #149 (comment)

The to_dataframe method in google-cloud-bigquery can call getQueryResults now, which should handle the scripting case. Though I'm actually not sure what happens when we try to use the BQ Storage API in google-cloud-bigquery with a script. Filed googleapis/python-bigquery#1148 to investigate this in google-cloud-bigquery.

if query_reply.destination is None:
return pandas.DataFrame()

rows_iter = self.client.list_rows(
query_reply.destination, max_results=max_results
)
Expand Down
30 changes: 30 additions & 0 deletions tests/system/test_read_gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
import collections
import datetime
import decimal
import random

import db_dtypes
from google.cloud import bigquery
import pandas
import pandas.testing
import pytest
Expand All @@ -21,6 +23,21 @@
)


@pytest.fixture
def writable_table(
bigquery_client: bigquery.Client, project_id: str, random_dataset: bigquery.Dataset
):
full_table_id = f"{project_id}.{random_dataset.dataset_id}.writable_table_{random.randrange(1_000_000_000)}"
table = bigquery.Table(full_table_id)
table.schema = [
bigquery.SchemaField("field1", "STRING"),
bigquery.SchemaField("field2", "INTEGER"),
]
bigquery_client.create_table(table)
yield full_table_id
bigquery_client.delete_table(full_table_id)


@pytest.mark.parametrize(["use_bqstorage_api"], [(True,), (False,)])
@pytest.mark.parametrize(
["query", "expected", "use_bqstorage_apis"],
Expand Down Expand Up @@ -605,3 +622,16 @@ def test_empty_dataframe(read_gbq, use_bqstorage_api):
)
result = read_gbq(query, use_bqstorage_api=use_bqstorage_api)
pandas.testing.assert_frame_equal(result, expected, check_index_type=False)


def test_dml_query(read_gbq, writable_table: str):
query = f"""
UPDATE `{writable_table}`
SET field1 = NULL
WHERE field1 = 'string';
UPDATE `{writable_table}`
SET field2 = NULL
WHERE field2 < 0;
"""
result = read_gbq(query)
assert result is not None
25 changes: 21 additions & 4 deletions tests/unit/test_gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,23 @@ def mock_get_credentials_no_project(*args, **kwargs):
return mock_credentials, None


@pytest.fixture(autouse=True)
def default_bigquery_client(mock_bigquery_client):
@pytest.fixture
def mock_query_job():
mock_query = mock.create_autospec(google.cloud.bigquery.QueryJob)
mock_query.job_id = "some-random-id"
mock_query.state = "DONE"
return mock_query


@pytest.fixture(autouse=True)
def default_bigquery_client(mock_bigquery_client, mock_query_job):
mock_rows = mock.create_autospec(google.cloud.bigquery.table.RowIterator)
mock_rows.total_rows = 1

mock_rows.__iter__.return_value = [(1,)]
mock_query.result.return_value = mock_rows
mock_query_job.result.return_value = mock_rows
mock_bigquery_client.list_rows.return_value = mock_rows
mock_bigquery_client.query.return_value = mock_query
mock_bigquery_client.query.return_value = mock_query_job

# Mock out SELECT 1 query results.
def generate_schema():
Expand Down Expand Up @@ -718,3 +723,15 @@ def test_read_gbq_with_list_rows_error_translates_exception(
)
def test_query_response_bytes(size_in_bytes, formatted_text):
assert gbq.GbqConnector.sizeof_fmt(size_in_bytes) == formatted_text


def test_run_query_with_dml_query(mock_bigquery_client, mock_query_job):
"""
Don't attempt to download results from a DML query / query with no results.

https://github.com/googleapis/python-bigquery-pandas/issues/481
"""
connector = _make_connector()
type(mock_query_job).destination = mock.PropertyMock(return_value=None)
connector.run_query("UPDATE tablename SET value = '';")
mock_bigquery_client.list_rows.assert_not_called()