From 9a9d3fda24d41457cb0ab1c803388e096ab6afcc Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 6 Dec 2021 16:22:01 -0600 Subject: [PATCH 01/20] feat: accepts a table ID, which downloads the table without a query --- pandas_gbq/gbq.py | 38 +++++++++++++++++-------------------- tests/system/conftest.py | 19 +++++++++++++++++++ tests/system/test_to_gbq.py | 19 +++++++------------ 3 files changed, 43 insertions(+), 33 deletions(-) diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 87c2327c..714c0995 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -374,7 +374,9 @@ def process_http_error(ex): raise GenericGBQException("Reason: {0}".format(ex)) - def run_query(self, query, max_results=None, progress_bar_type=None, **kwargs): + def run_query( + self, query_or_table, max_results=None, progress_bar_type=None, **kwargs + ): from concurrent.futures import TimeoutError from google.auth.exceptions import RefreshError @@ -391,20 +393,20 @@ def run_query(self, query, max_results=None, progress_bar_type=None, **kwargs): job_config.update(config) if "query" in config and "query" in config["query"]: - if query is not None: + if query_or_table is not None: raise ValueError( "Query statement can't be specified " "inside config while it is specified " "as parameter" ) - query = config["query"].pop("query") + query_or_table = config["query"].pop("query") self._start_timer() try: logger.debug("Requesting query... ") query_reply = self.client.query( - query, + query_or_table, job_config=bigquery.QueryJobConfig.from_api_repr(job_config), location=self.location, project=self.project_id, @@ -639,7 +641,7 @@ def _cast_empty_df_dtypes(schema_fields, df): def read_gbq( - query, + query_or_table, project_id=None, index_col=None, col_order=None, @@ -663,17 +665,18 @@ def read_gbq( This method uses the Google Cloud client library to make requests to Google BigQuery, documented `here - `__. + `__. See the :ref:`How to authenticate with Google BigQuery ` guide for authentication instructions. Parameters ---------- - query : str - SQL-Like Query to return data values. + query_or_table : str + SQL query to return data values. If the string is a table ID, fetch the + rows directly from the table without running a query. project_id : str, optional - Google BigQuery Account project ID. Optional when available from + Google Cloud Platform project ID. Optional when available from the environment. index_col : str, optional Name of result column to use for index in results DataFrame. @@ -688,9 +691,9 @@ def read_gbq( when getting user credentials. .. _local webserver flow: - http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server + https://googleapis.dev/python/google-auth-oauthlib/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server .. _console flow: - http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console + https://googleapis.dev/python/google-auth-oauthlib/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console .. versionadded:: 0.2.0 dialect : str, default 'standard' @@ -740,13 +743,6 @@ def read_gbq( `__ permission on the project you are billing queries to. - **Note:** Due to a `known issue in the ``google-cloud-bigquery`` - package - `__ - (fixed in version 1.11.0), you must write your query results to a - destination table. To do this with ``read_gbq``, supply a - ``configuration`` dictionary. - This feature requires the ``google-cloud-bigquery-storage`` and ``pyarrow`` packages. @@ -830,7 +826,7 @@ def read_gbq( ) final_df = connector.run_query( - query, + query_or_table, configuration=configuration, max_results=max_results, progress_bar_type=progress_bar_type, @@ -884,7 +880,7 @@ def to_gbq( This method uses the Google Cloud client library to make requests to Google BigQuery, documented `here - `__. + `__. See the :ref:`How to authenticate with Google BigQuery ` guide for authentication instructions. @@ -897,7 +893,7 @@ def to_gbq( Name of table to be written, in the form ``dataset.tablename`` or ``project.dataset.tablename``. project_id : str, optional - Google BigQuery Account project ID. Optional when available from + Google Cloud Platform project ID. Optional when available from the environment. chunksize : int, optional Number of rows to be inserted in each chunk from the dataframe. diff --git a/tests/system/conftest.py b/tests/system/conftest.py index 6ac55220..4ba8bf31 100644 --- a/tests/system/conftest.py +++ b/tests/system/conftest.py @@ -3,6 +3,7 @@ # license that can be found in the LICENSE file. import os +import functools import pathlib from google.cloud import bigquery @@ -56,6 +57,24 @@ def project(project_id): return project_id +@pytest.fixture +def to_gbq(credentials, project_id): + import pandas_gbq + + return functools.partial( + pandas_gbq.to_gbq, project_id=project_id, credentials=credentials + ) + + +@pytest.fixture +def read_gbq(credentials, project_id): + import pandas_gbq + + return functools.partial( + pandas_gbq.read_gbq, project_id=project_id, credentials=credentials + ) + + @pytest.fixture() def random_dataset_id(bigquery_client: bigquery.Client, project_id: str): dataset_id = prefixer.create_prefix() diff --git a/tests/system/test_to_gbq.py b/tests/system/test_to_gbq.py index 4421f3be..f8d9c7f7 100644 --- a/tests/system/test_to_gbq.py +++ b/tests/system/test_to_gbq.py @@ -5,7 +5,6 @@ import datetime import decimal import collections -import functools import random import db_dtypes @@ -23,12 +22,8 @@ def api_method(request): @pytest.fixture -def method_under_test(credentials, project_id): - import pandas_gbq - - return functools.partial( - pandas_gbq.to_gbq, project_id=project_id, credentials=credentials - ) +def method_under_test(to_gbq): + return to_gbq SeriesRoundTripTestCase = collections.namedtuple( @@ -98,7 +93,7 @@ def method_under_test(credentials, project_id): def test_series_round_trip( method_under_test, random_dataset_id, - bigquery_client, + read_gbq, input_series, api_method, api_methods, @@ -114,7 +109,7 @@ def test_series_round_trip( ) method_under_test(df, table_id, api_method=api_method) - round_trip = bigquery_client.list_rows(table_id).to_dataframe() + round_trip = read_gbq(table_id) round_trip_series = round_trip["test_col"].sort_values().reset_index(drop=True) pandas.testing.assert_series_equal( round_trip_series, input_series, check_exact=True, check_names=False, @@ -196,8 +191,8 @@ def test_series_round_trip( ) def test_dataframe_round_trip_with_table_schema( method_under_test, + read_gbq, random_dataset_id, - bigquery_client, input_df, expected_df, table_schema, @@ -212,8 +207,8 @@ def test_dataframe_round_trip_with_table_schema( method_under_test( input_df, table_id, table_schema=table_schema, api_method=api_method ) - round_trip = bigquery_client.list_rows(table_id).to_dataframe( - dtypes=dict(zip(expected_df.columns, expected_df.dtypes)) + round_trip = read_gbq( + table_id, dtypes=dict(zip(expected_df.columns, expected_df.dtypes)), ) round_trip.sort_values("row_num", inplace=True) pandas.testing.assert_frame_equal(expected_df, round_trip) From 6adf2332fa7726532872a68e3283e004f9c3c1db Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 6 Dec 2021 17:12:17 -0600 Subject: [PATCH 02/20] add todo for next steps --- pandas_gbq/gbq.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 714c0995..d2cc38f9 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -506,13 +506,18 @@ def _download_results( to_dataframe_kwargs["create_bqstorage_client"] = create_bqstorage_client try: + # TODO: This is the only difference between table ID and query job. + # But should I refactor for + # https://github.com/googleapis/python-bigquery-pandas/issues/339 + # now? query_job.result() # Get the table schema, so that we can list rows. destination = self.client.get_table(query_job.destination) rows_iter = self.client.list_rows(destination, max_results=max_results) - schema_fields = [field.to_api_repr() for field in rows_iter.schema] conversion_dtypes = _bqschema_to_nullsafe_dtypes(schema_fields) + # ENDTODO: This is the only difference between table ID and + conversion_dtypes.update(user_dtypes) df = rows_iter.to_dataframe( dtypes=conversion_dtypes, From 9b1eb0dc709beccdd59058874d6b9a7339da5864 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 9 Dec 2021 14:28:34 -0600 Subject: [PATCH 03/20] add unit test for table ID read_gbq --- tests/unit/conftest.py | 17 ++++++++++++++--- tests/unit/test_gbq.py | 19 ++++++++++++++++++- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index cfa1e819..513df4b9 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -26,18 +26,29 @@ def mock_bigquery_client(monkeypatch): # Constructor returns the mock itself, so this mock can be treated as the # constructor or the instance. mock_client.return_value = mock_client - mock_schema = [google.cloud.bigquery.SchemaField("_f0", "INTEGER")] - # Mock out SELECT 1 query results. + mock_query = mock.create_autospec(google.cloud.bigquery.QueryJob) mock_query.job_id = "some-random-id" mock_query.state = "DONE" mock_rows = mock.create_autospec(google.cloud.bigquery.table.RowIterator) mock_rows.total_rows = 1 - mock_rows.schema = mock_schema + mock_rows.__iter__.return_value = [(1,)] mock_query.result.return_value = mock_rows + mock_client.list_rows.return_value = mock_rows mock_client.query.return_value = mock_query # Mock table creation. monkeypatch.setattr(google.cloud.bigquery, "Client", mock_client) mock_client.reset_mock() + + # Mock out SELECT 1 query results. + def generate_schema(): + query = mock_client.query.call_args[0][0] + if query == "SELECT 1 AS int_col": + return [google.cloud.bigquery.SchemaField("int_col", "INTEGER")] + else: + return [google.cloud.bigquery.SchemaField("_f0", "INTEGER")] + + type(mock_rows).schema = mock.PropertyMock(side_effect=generate_schema) + return mock_client diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index 8784a98b..bc12c47c 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -292,9 +292,10 @@ def test_read_gbq_with_no_project_id_given_should_fail(monkeypatch): gbq.read_gbq("SELECT 1", dialect="standard") -def test_read_gbq_with_inferred_project_id(monkeypatch): +def test_read_gbq_with_inferred_project_id(mock_bigquery_client): df = gbq.read_gbq("SELECT 1", dialect="standard") assert df is not None + mock_bigquery_client.query.assert_called_once() def test_read_gbq_with_inferred_project_id_from_service_account_credentials( @@ -505,3 +506,19 @@ def test_read_gbq_calls_tqdm(mock_bigquery_client, mock_service_account_credenti _, to_dataframe_kwargs = mock_list_rows.to_dataframe.call_args assert to_dataframe_kwargs["progress_bar_type"] == "foobar" + + +def test_read_gbq_bypasses_query_with_table_id( + mock_bigquery_client, mock_service_account_credentials +): + mock_service_account_credentials.project_id = "service_account_project_id" + df = gbq.read_gbq( + "my-project.my_dataset.read_gbq_table", + credentials=mock_service_account_credentials, + ) + assert df is not None + + mock_bigquery_client.query.assert_not_called() + mock_bigquery_client.list_rows.assert_called_with( + "my-project.my_dataset.read_gbq_table" + ) From ec9ddaff46e1911e24fbba4cfc66e743515dde63 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 9 Dec 2021 15:28:44 -0600 Subject: [PATCH 04/20] add helper for is_query --- pandas_gbq/gbq.py | 5 +++++ tests/unit/test_gbq.py | 13 +++++++++++++ 2 files changed, 18 insertions(+) diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index bba98f57..07fc6852 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -3,6 +3,7 @@ # license that can be found in the LICENSE file. import logging +import re import time import warnings from datetime import datetime @@ -64,6 +65,10 @@ def _test_google_api_imports(): raise ImportError("pandas-gbq requires google-cloud-bigquery") from ex +def _is_query(query_or_table: str) -> bool: + return re.search(r"\s", query_or_table.strip(), re.MULTILINE) is not None + + class DatasetCreationError(ValueError): """ Raised when the create dataset method fails diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index bc12c47c..496486ef 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -82,6 +82,19 @@ def test__bqschema_to_nullsafe_dtypes(type_, expected): assert result == {"x": expected} +@pytest.mark.parametrize( + ["query_or_table", "expected"], + [ + ("SELECT 1", True), + ("dataset.table", False), + ("project-id.dataset.table", False), + ], +) +def test__is_query(query_or_table, expected): + result = gbq._is_query(query_or_table) + assert result == expected + + def test_GbqConnector_get_client_w_old_bq(monkeypatch, mock_bigquery_client): gbq._test_google_api_imports() connector = _make_connector() From 9cc7c74c3d2f76de9ba1beb6fd15156101717be6 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 10 Dec 2021 15:07:44 -0600 Subject: [PATCH 05/20] implement read_gbq with table id --- pandas_gbq/gbq.py | 85 ++++++++++++++++++++++++++++-------------- tests/unit/conftest.py | 8 +++- tests/unit/test_gbq.py | 37 +++++++++++++++++- 3 files changed, 99 insertions(+), 31 deletions(-) diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 07fc6852..6a8b6788 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -379,9 +379,26 @@ def process_http_error(ex): raise GenericGBQException("Reason: {0}".format(ex)) - def run_query( - self, query_or_table, max_results=None, progress_bar_type=None, **kwargs + def download_table( + self, table_id, max_results=None, progress_bar_type=None, dtypes=None ): + self._start_timer() + + try: + # Get the table schema, so that we can list rows. + destination = self.client.get_table(table_id) + rows_iter = self.client.list_rows(destination, max_results=max_results) + except self.http_error as ex: + self.process_http_error(ex) + + return self._download_results( + rows_iter, + max_results=max_results, + progress_bar_type=progress_bar_type, + user_dtypes=dtypes, + ) + + def run_query(self, query, max_results=None, progress_bar_type=None, **kwargs): from concurrent.futures import TimeoutError from google.auth.exceptions import RefreshError @@ -397,21 +414,12 @@ def run_query( if config is not None: job_config.update(config) - if "query" in config and "query" in config["query"]: - if query_or_table is not None: - raise ValueError( - "Query statement can't be specified " - "inside config while it is specified " - "as parameter" - ) - query_or_table = config["query"].pop("query") - self._start_timer() try: logger.debug("Requesting query... ") query_reply = self.client.query( - query_or_table, + query, job_config=bigquery.QueryJobConfig.from_api_repr(job_config), location=self.location, project=self.project_id, @@ -471,15 +479,25 @@ def run_query( ) dtypes = kwargs.get("dtypes") + + # Ensure destination is populated. + try: + query_reply.result() + except self.http_error as ex: + self.process_http_error(ex) + + # Get the table schema, so that we can list rows. + destination = self.client.get_table(query_reply.destination) + rows_iter = self.client.list_rows(destination, max_results=max_results) return self._download_results( - query_reply, + rows_iter, max_results=max_results, progress_bar_type=progress_bar_type, user_dtypes=dtypes, ) def _download_results( - self, query_job, max_results=None, progress_bar_type=None, user_dtypes=None, + self, rows_iter, max_results=None, progress_bar_type=None, user_dtypes=None, ): # No results are desired, so don't bother downloading anything. if max_results == 0: @@ -511,14 +529,6 @@ def _download_results( to_dataframe_kwargs["create_bqstorage_client"] = create_bqstorage_client try: - # TODO: This is the only difference between table ID and query job. - # But should I refactor for - # https://github.com/googleapis/python-bigquery-pandas/issues/339 - # now? - query_job.result() - # Get the table schema, so that we can list rows. - destination = self.client.get_table(query_job.destination) - rows_iter = self.client.list_rows(destination, max_results=max_results) schema_fields = [field.to_api_repr() for field in rows_iter.schema] conversion_dtypes = _bqschema_to_nullsafe_dtypes(schema_fields) # ENDTODO: This is the only difference between table ID and @@ -829,6 +839,15 @@ def read_gbq( if dialect not in ("legacy", "standard"): raise ValueError("'{0}' is not valid for dialect".format(dialect)) + if configuration and "query" in configuration and "query" in configuration["query"]: + if query_or_table is not None: + raise ValueError( + "Query statement can't be specified " + "inside config while it is specified " + "as parameter" + ) + query_or_table = configuration["query"].pop("query") + connector = GbqConnector( project_id, reauth=reauth, @@ -840,13 +859,21 @@ def read_gbq( use_bqstorage_api=use_bqstorage_api, ) - final_df = connector.run_query( - query_or_table, - configuration=configuration, - max_results=max_results, - progress_bar_type=progress_bar_type, - dtypes=dtypes, - ) + if _is_query(query_or_table): + final_df = connector.run_query( + query_or_table, + configuration=configuration, + max_results=max_results, + progress_bar_type=progress_bar_type, + dtypes=dtypes, + ) + else: + final_df = connector.download_table( + query_or_table, + max_results=max_results, + progress_bar_type=progress_bar_type, + dtypes=dtypes, + ) # Reindex the DataFrame on the provided column if index_col is not None: diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 513df4b9..3f0c5e53 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -43,7 +43,7 @@ def mock_bigquery_client(monkeypatch): # Mock out SELECT 1 query results. def generate_schema(): - query = mock_client.query.call_args[0][0] + query = mock_client.query.call_args[0][0] if mock_client.query.call_args else "" if query == "SELECT 1 AS int_col": return [google.cloud.bigquery.SchemaField("int_col", "INTEGER")] else: @@ -51,4 +51,10 @@ def generate_schema(): type(mock_rows).schema = mock.PropertyMock(side_effect=generate_schema) + # Mock out get_table. + def get_table(table_ref_or_id, **kwargs): + return google.cloud.bigquery.Table(table_ref_or_id) + + mock_client.get_table.side_effect = get_table + return mock_client diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index 496486ef..480f0000 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -521,13 +521,14 @@ def test_read_gbq_calls_tqdm(mock_bigquery_client, mock_service_account_credenti assert to_dataframe_kwargs["progress_bar_type"] == "foobar" -def test_read_gbq_bypasses_query_with_table_id( +def test_read_gbq_with_full_table_id( mock_bigquery_client, mock_service_account_credentials ): mock_service_account_credentials.project_id = "service_account_project_id" df = gbq.read_gbq( "my-project.my_dataset.read_gbq_table", credentials=mock_service_account_credentials, + project_id="param-project", ) assert df is not None @@ -535,3 +536,37 @@ def test_read_gbq_bypasses_query_with_table_id( mock_bigquery_client.list_rows.assert_called_with( "my-project.my_dataset.read_gbq_table" ) + + +def test_read_gbq_with_partial_table_id( + mock_bigquery_client, mock_service_account_credentials +): + mock_service_account_credentials.project_id = "service_account_project_id" + df = gbq.read_gbq( + "my_dataset.read_gbq_table", + credentials=mock_service_account_credentials, + project_id="param-project", + ) + assert df is not None + + mock_bigquery_client.query.assert_not_called() + mock_bigquery_client.list_rows.assert_called_with( + "param-project.my_dataset.read_gbq_table" + ) + + +def test_read_gbq_bypasses_query_with_table_id_and_max_results( + mock_bigquery_client, mock_service_account_credentials +): + mock_service_account_credentials.project_id = "service_account_project_id" + df = gbq.read_gbq( + "my-project.my_dataset.read_gbq_table", + credentials=mock_service_account_credentials, + max_results=11, + ) + assert df is not None + + mock_bigquery_client.query.assert_not_called() + mock_bigquery_client.list_rows.assert_called_with( + "my-project.my_dataset.read_gbq_table", max_results=11 + ) From dd51ad8e7d9c51301e33ec2422fe6d80013e7322 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 10 Dec 2021 15:49:49 -0600 Subject: [PATCH 06/20] fix remaining tests, don't localalize out-of-bounds timestamp columns --- pandas_gbq/gbq.py | 5 ++++- pandas_gbq/timestamp.py | 8 +++++++- tests/unit/test_gbq.py | 7 ++++--- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 6a8b6788..247df17c 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -386,7 +386,10 @@ def download_table( try: # Get the table schema, so that we can list rows. - destination = self.client.get_table(table_id) + table_ref = bigquery.TableReference.from_string( + table_id, default_project=self.project_id + ) + destination = self.client.get_table(table_ref) rows_iter = self.client.list_rows(destination, max_results=max_results) except self.http_error as ex: self.process_http_error(ex) diff --git a/pandas_gbq/timestamp.py b/pandas_gbq/timestamp.py index e0b41475..c6bb6d93 100644 --- a/pandas_gbq/timestamp.py +++ b/pandas_gbq/timestamp.py @@ -7,6 +7,8 @@ Private module. """ +import pandas.api.types + def localize_df(df, schema_fields): """Localize any TIMESTAMP columns to tz-aware type. @@ -38,7 +40,11 @@ def localize_df(df, schema_fields): if "mode" in field and field["mode"].upper() == "REPEATED": continue - if field["type"].upper() == "TIMESTAMP" and df[column].dt.tz is None: + if ( + field["type"].upper() == "TIMESTAMP" + and pandas.api.types.is_datetime64_ns_dtype(df.dtypes[column]) + and df[column].dt.tz is None + ): df[column] = df[column].dt.tz_localize("UTC") return df diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index 480f0000..7593eea5 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -8,6 +8,7 @@ import datetime from unittest import mock +from google.cloud import bigquery import numpy import pandas from pandas import DataFrame @@ -534,7 +535,7 @@ def test_read_gbq_with_full_table_id( mock_bigquery_client.query.assert_not_called() mock_bigquery_client.list_rows.assert_called_with( - "my-project.my_dataset.read_gbq_table" + bigquery.Table("my-project.my_dataset.read_gbq_table"), max_results=None, ) @@ -551,7 +552,7 @@ def test_read_gbq_with_partial_table_id( mock_bigquery_client.query.assert_not_called() mock_bigquery_client.list_rows.assert_called_with( - "param-project.my_dataset.read_gbq_table" + bigquery.Table("param-project.my_dataset.read_gbq_table"), max_results=None, ) @@ -568,5 +569,5 @@ def test_read_gbq_bypasses_query_with_table_id_and_max_results( mock_bigquery_client.query.assert_not_called() mock_bigquery_client.list_rows.assert_called_with( - "my-project.my_dataset.read_gbq_table", max_results=11 + bigquery.Table("my-project.my_dataset.read_gbq_table"), max_results=11 ) From e1ad679671f920b5964b7b987d9ceb3b36dca10e Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 10 Dec 2021 15:52:12 -0600 Subject: [PATCH 07/20] Update pandas_gbq/gbq.py --- pandas_gbq/gbq.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 247df17c..fdd4dcc6 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -534,8 +534,6 @@ def _download_results( try: schema_fields = [field.to_api_repr() for field in rows_iter.schema] conversion_dtypes = _bqschema_to_nullsafe_dtypes(schema_fields) - # ENDTODO: This is the only difference between table ID and - conversion_dtypes.update(user_dtypes) df = rows_iter.to_dataframe( dtypes=conversion_dtypes, From d29bc2ac072f0c1673944557b0dc53c12487a99a Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 10 Dec 2021 16:30:35 -0600 Subject: [PATCH 08/20] fix 3.7 unit tests --- noxfile.py | 2 +- tests/unit/test_gbq.py | 24 ++++++++++++++---------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/noxfile.py b/noxfile.py index df3378bf..7530c68a 100644 --- a/noxfile.py +++ b/noxfile.py @@ -259,7 +259,7 @@ def cover(session): test runs (not system test runs), and then erases coverage data. """ session.install("coverage", "pytest-cov") - session.run("coverage", "report", "--show-missing", "--fail-under=88") + session.run("coverage", "report", "--show-missing", "--fail-under=91") session.run("coverage", "erase") diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index 7593eea5..142771d1 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -8,7 +8,6 @@ import datetime from unittest import mock -from google.cloud import bigquery import numpy import pandas from pandas import DataFrame @@ -534,9 +533,10 @@ def test_read_gbq_with_full_table_id( assert df is not None mock_bigquery_client.query.assert_not_called() - mock_bigquery_client.list_rows.assert_called_with( - bigquery.Table("my-project.my_dataset.read_gbq_table"), max_results=None, - ) + sent_table = mock_bigquery_client.list_rows.call_args[0][0] + assert sent_table.project == "my-project" + assert sent_table.dataset_id == "my_dataset" + assert sent_table.table_id == "read_gbq_table" def test_read_gbq_with_partial_table_id( @@ -551,9 +551,10 @@ def test_read_gbq_with_partial_table_id( assert df is not None mock_bigquery_client.query.assert_not_called() - mock_bigquery_client.list_rows.assert_called_with( - bigquery.Table("param-project.my_dataset.read_gbq_table"), max_results=None, - ) + sent_table = mock_bigquery_client.list_rows.call_args[0][0] + assert sent_table.project == "param-project" + assert sent_table.dataset_id == "my_dataset" + assert sent_table.table_id == "read_gbq_table" def test_read_gbq_bypasses_query_with_table_id_and_max_results( @@ -568,6 +569,9 @@ def test_read_gbq_bypasses_query_with_table_id_and_max_results( assert df is not None mock_bigquery_client.query.assert_not_called() - mock_bigquery_client.list_rows.assert_called_with( - bigquery.Table("my-project.my_dataset.read_gbq_table"), max_results=11 - ) + sent_table = mock_bigquery_client.list_rows.call_args[0][0] + assert sent_table.project == "my-project" + assert sent_table.dataset_id == "my_dataset" + assert sent_table.table_id == "read_gbq_table" + sent_max_results = mock_bigquery_client.list_rows.call_args[1]["max_results"] + assert sent_max_results == 11 From cb8f24f5153535fdff344f2b3837b10222b4e322 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 10 Dec 2021 16:32:56 -0600 Subject: [PATCH 09/20] correct coverage --- noxfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index 7530c68a..398b4dc2 100644 --- a/noxfile.py +++ b/noxfile.py @@ -259,7 +259,7 @@ def cover(session): test runs (not system test runs), and then erases coverage data. """ session.install("coverage", "pytest-cov") - session.run("coverage", "report", "--show-missing", "--fail-under=91") + session.run("coverage", "report", "--show-missing", "--fail-under=89") session.run("coverage", "erase") From 56b73b213444955b28041f1822c6ceccee93916c Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 10 Dec 2021 16:34:30 -0600 Subject: [PATCH 10/20] skip coverage for optional test skip --- tests/unit/test_gbq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index 142771d1..0c27dd76 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -487,7 +487,7 @@ def test_read_gbq_passes_dtypes(mock_bigquery_client, mock_service_account_crede def test_read_gbq_use_bqstorage_api( mock_bigquery_client, mock_service_account_credentials ): - if not FEATURES.bigquery_has_bqstorage: + if not FEATURES.bigquery_has_bqstorage: # pragma: NO COVER pytest.skip("requires BigQuery Storage API") mock_service_account_credentials.project_id = "service_account_project_id" From 8a61e97e31d5fd5a29898554f52cb66c422f12e9 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 10 Dec 2021 16:47:34 -0600 Subject: [PATCH 11/20] fix docs build --- pandas_gbq/gbq.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index fdd4dcc6..41cb2f5b 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -712,14 +712,14 @@ def read_gbq( reauth : boolean, default False Force Google BigQuery to re-authenticate the user. This is useful if multiple accounts are used. - auth_local_webserver : boolean, default False - Use the `local webserver flow`_ instead of the `console flow`_ - when getting user credentials. - - .. _local webserver flow: - https://googleapis.dev/python/google-auth-oauthlib/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server - .. _console flow: - https://googleapis.dev/python/google-auth-oauthlib/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console + auth_local_webserver : bool, default False + Use the `local webserver flow + `_ + instead of the `console flow + `_ + when getting user credentials. Your code must run on the same machine + as your web browser and your web browser can access your application + via ``localhost:808X``. .. versionadded:: 0.2.0 dialect : str, default 'standard' @@ -954,13 +954,13 @@ def to_gbq( ``'append'`` If table exists, insert data. Create if does not exist. auth_local_webserver : bool, default False - Use the `local webserver flow`_ instead of the `console flow`_ - when getting user credentials. - - .. _local webserver flow: - http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server - .. _console flow: - http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console + Use the `local webserver flow + `_ + instead of the `console flow + `_ + when getting user credentials. Your code must run on the same machine + as your web browser and your web browser can access your application + via ``localhost:808X``. .. versionadded:: 0.2.0 table_schema : list of dicts, optional From 3f7900bf184a10337c9bab19fa703211650da1df Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 10 Dec 2021 16:56:20 -0600 Subject: [PATCH 12/20] improve test coverage for error case --- tests/unit/test_gbq.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index 0c27dd76..9a0e8ce3 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -8,6 +8,7 @@ import datetime from unittest import mock +import google.api_core.exceptions import numpy import pandas from pandas import DataFrame @@ -575,3 +576,17 @@ def test_read_gbq_bypasses_query_with_table_id_and_max_results( assert sent_table.table_id == "read_gbq_table" sent_max_results = mock_bigquery_client.list_rows.call_args[1]["max_results"] assert sent_max_results == 11 + + +def test_read_gbq_with_list_rows_error_translates_exception( + mock_bigquery_client, mock_service_account_credentials +): + mock_bigquery_client.list_rows.side_effect = ( + google.api_core.exceptions.NotFound("table not found"), + ) + + with pytest.raises(gbq.GenericGBQException, match="table not found"): + gbq.read_gbq( + "my-project.my_dataset.read_gbq_table", + credentials=mock_service_account_credentials, + ) From 3c53f1f697265a9034b00fadfe99f525100f8eae Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 13 Dec 2021 10:21:46 -0600 Subject: [PATCH 13/20] as of google-cloud-bigquery 1.11.0, get_table before list_rows is unnecessary --- pandas_gbq/gbq.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 41cb2f5b..1ba64057 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -385,12 +385,10 @@ def download_table( self._start_timer() try: - # Get the table schema, so that we can list rows. table_ref = bigquery.TableReference.from_string( table_id, default_project=self.project_id ) - destination = self.client.get_table(table_ref) - rows_iter = self.client.list_rows(destination, max_results=max_results) + rows_iter = self.client.list_rows(table_ref, max_results=max_results) except self.http_error as ex: self.process_http_error(ex) @@ -489,9 +487,9 @@ def run_query(self, query, max_results=None, progress_bar_type=None, **kwargs): except self.http_error as ex: self.process_http_error(ex) - # Get the table schema, so that we can list rows. - destination = self.client.get_table(query_reply.destination) - rows_iter = self.client.list_rows(destination, max_results=max_results) + rows_iter = self.client.list_rows( + query_reply.destination, max_results=max_results + ) return self._download_results( rows_iter, max_results=max_results, From 5ce125f13934b27a16889ae22aabf65d77837178 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 20 Dec 2021 13:42:21 -0600 Subject: [PATCH 14/20] tests with whitespace --- tests/unit/test_gbq.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index 9a0e8ce3..df9241bc 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -87,8 +87,14 @@ def test__bqschema_to_nullsafe_dtypes(type_, expected): ["query_or_table", "expected"], [ ("SELECT 1", True), + ("SELECT\n1", True), + ("SELECT\t1", True), ("dataset.table", False), + (" dataset.table ", False), + ("\r\ndataset.table\r\n", False), ("project-id.dataset.table", False), + (" project-id.dataset.table ", False), + ("\r\nproject-id.dataset.table\r\n", False), ], ) def test__is_query(query_or_table, expected): From ea660f41e7717a20038119a17890795c3cfedb38 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 20 Dec 2021 13:49:25 -0600 Subject: [PATCH 15/20] type annotations --- pandas_gbq/gbq.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 1ba64057..cc6bef1f 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -7,9 +7,17 @@ import time import warnings from datetime import datetime +import typing +from typing import Dict, Optional, Union import numpy as np +# Only import at module-level at type checking time to avoid circular +# dependencies in the pandas package, which has an optional dependency on +# pandas-gbq. +if typing.TYPE_CHECKING: + import pandas + # Required dependencies, but treat as optional so that _test_google_api_imports # can provide a better error message. try: @@ -380,8 +388,14 @@ def process_http_error(ex): raise GenericGBQException("Reason: {0}".format(ex)) def download_table( - self, table_id, max_results=None, progress_bar_type=None, dtypes=None - ): + self, + table_id: str, + max_results: Optional[int] = None, + progress_bar_type: Optional[str] = None, + dtypes: Dict[ + str, Union[str, "pandas.api.extensions.ExtensionDtype", np.dtype] + ] = None, + ) -> "pandas.DataFrame": self._start_timer() try: From 670499150b81c1598d29db451fded75b51dbb057 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 20 Dec 2021 17:26:48 -0600 Subject: [PATCH 16/20] improve coverage in owlbot config --- owlbot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/owlbot.py b/owlbot.py index 5ef93de7..9849f98f 100644 --- a/owlbot.py +++ b/owlbot.py @@ -33,7 +33,7 @@ templated_files = common.py_library( unit_test_python_versions=["3.7", "3.8", "3.9", "3.10"], system_test_python_versions=["3.7", "3.8", "3.9", "3.10"], - cov_level=88, + cov_level=89, unit_test_extras=extras, system_test_extras=extras, intersphinx_dependencies={ From 9a1ca1651fbfb1612d710f8d0177aa43a12ed187 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Mon, 20 Dec 2021 23:29:09 +0000 Subject: [PATCH 17/20] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- .coveragerc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.coveragerc b/.coveragerc index ba50bf32..88b85d03 100644 --- a/.coveragerc +++ b/.coveragerc @@ -22,7 +22,7 @@ omit = google/cloud/__init__.py [report] -fail_under = 88 +fail_under = 89 show_missing = True exclude_lines = # Re-enable the standard pragma From a9075df20e378ee507d1e28019ce5ecfb6741ada Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 21 Dec 2021 13:36:42 -0600 Subject: [PATCH 18/20] boost coverage --- pandas_gbq/load.py | 7 +++++- pandas_gbq/schema.py | 2 ++ tests/unit/test_load.py | 56 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 1 deletion(-) diff --git a/pandas_gbq/load.py b/pandas_gbq/load.py index 315ad5cd..588a6719 100644 --- a/pandas_gbq/load.py +++ b/pandas_gbq/load.py @@ -185,6 +185,11 @@ def load_csv_from_file( chunksize: Optional[int], schema: Optional[Dict[str, Any]], ): + """Manually encode a DataFrame to CSV and use the buffer in a load job. + + This method is needed for writing with google-cloud-bigquery versions that + don't implment load_table_from_dataframe with the CSV serialization format. + """ if schema is None: schema = pandas_gbq.schema.generate_bq_schema(dataframe) @@ -203,7 +208,7 @@ def load_chunk(chunk, job_config): finally: chunk_buffer.close() - return load_csv(dataframe, chunksize, bq_schema, load_chunk,) + return load_csv(dataframe, chunksize, bq_schema, load_chunk) def load_chunks( diff --git a/pandas_gbq/schema.py b/pandas_gbq/schema.py index e2f97455..118e00f0 100644 --- a/pandas_gbq/schema.py +++ b/pandas_gbq/schema.py @@ -101,6 +101,8 @@ def generate_bq_schema(dataframe, default_type="STRING"): "S": "STRING", "U": "STRING", "M": "TIMESTAMP", + # TODO: Disambiguate TIMESTAMP from DATETIME based on if column is + # localized. } fields = [] diff --git a/tests/unit/test_load.py b/tests/unit/test_load.py index 8e18cfb9..3f32bff9 100644 --- a/tests/unit/test_load.py +++ b/tests/unit/test_load.py @@ -95,6 +95,62 @@ def test_encode_chunks_with_chunksize_none(): assert len(chunk.index) == 6 +def test_load_csv_from_file_generates_schema(mock_bigquery_client): + import google.cloud.bigquery + + df = pandas.DataFrame( + { + "int_col": [1, 2, 3], + "bool_col": [True, False, True], + "float_col": [0.0, 1.25, -2.75], + "string_col": ["a", "b", "c"], + "datetime_col": pandas.Series( + [ + "2021-12-21 13:28:40.123789", + "2000-01-01 11:10:09", + "2040-10-31 23:59:59.999999", + ], + dtype="datetime64[ns]", + ), + "timestamp_col": pandas.Series( + [ + "2021-12-21 13:28:40.123789", + "2000-01-01 11:10:09", + "2040-10-31 23:59:59.999999", + ], + dtype="datetime64[ns]", + ).dt.tz_localize(datetime.timezone.utc), + } + ) + destination = google.cloud.bigquery.TableReference.from_string( + "my-project.my_dataset.my_table" + ) + + _ = list( + load.load_csv_from_file(mock_bigquery_client, df, destination, None, None, None) + ) + + mock_load = mock_bigquery_client.load_table_from_file + assert mock_load.called + _, kwargs = mock_load.call_args + assert "job_config" in kwargs + sent_schema = kwargs["job_config"].schema + assert sent_schema[0].name == "int_col" + assert sent_schema[0].field_type == "INTEGER" + assert sent_schema[1].name == "bool_col" + assert sent_schema[1].field_type == "BOOLEAN" + assert sent_schema[2].name == "float_col" + assert sent_schema[2].field_type == "FLOAT" + assert sent_schema[3].name == "string_col" + assert sent_schema[3].field_type == "STRING" + # TODO: Disambiguate TIMESTAMP from DATETIME based on if column is + # localized. + assert sent_schema[4].name == "datetime_col" + assert sent_schema[4].field_type == "TIMESTAMP" + assert sent_schema[5].name == "timestamp_col" + assert sent_schema[5].field_type == "TIMESTAMP" + + @pytest.mark.parametrize( ["bigquery_has_from_dataframe_with_csv", "api_method"], [(True, "load_parquet"), (True, "load_csv"), (False, "load_csv")], From ed3f9d9e624be918587927738c961191e46e383c Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 22 Dec 2021 11:15:31 -0600 Subject: [PATCH 19/20] Revert "boost coverage" This reverts commit a9075df20e378ee507d1e28019ce5ecfb6741ada. --- pandas_gbq/load.py | 7 +----- pandas_gbq/schema.py | 2 -- tests/unit/test_load.py | 56 ----------------------------------------- 3 files changed, 1 insertion(+), 64 deletions(-) diff --git a/pandas_gbq/load.py b/pandas_gbq/load.py index 588a6719..315ad5cd 100644 --- a/pandas_gbq/load.py +++ b/pandas_gbq/load.py @@ -185,11 +185,6 @@ def load_csv_from_file( chunksize: Optional[int], schema: Optional[Dict[str, Any]], ): - """Manually encode a DataFrame to CSV and use the buffer in a load job. - - This method is needed for writing with google-cloud-bigquery versions that - don't implment load_table_from_dataframe with the CSV serialization format. - """ if schema is None: schema = pandas_gbq.schema.generate_bq_schema(dataframe) @@ -208,7 +203,7 @@ def load_chunk(chunk, job_config): finally: chunk_buffer.close() - return load_csv(dataframe, chunksize, bq_schema, load_chunk) + return load_csv(dataframe, chunksize, bq_schema, load_chunk,) def load_chunks( diff --git a/pandas_gbq/schema.py b/pandas_gbq/schema.py index 118e00f0..e2f97455 100644 --- a/pandas_gbq/schema.py +++ b/pandas_gbq/schema.py @@ -101,8 +101,6 @@ def generate_bq_schema(dataframe, default_type="STRING"): "S": "STRING", "U": "STRING", "M": "TIMESTAMP", - # TODO: Disambiguate TIMESTAMP from DATETIME based on if column is - # localized. } fields = [] diff --git a/tests/unit/test_load.py b/tests/unit/test_load.py index 3f32bff9..8e18cfb9 100644 --- a/tests/unit/test_load.py +++ b/tests/unit/test_load.py @@ -95,62 +95,6 @@ def test_encode_chunks_with_chunksize_none(): assert len(chunk.index) == 6 -def test_load_csv_from_file_generates_schema(mock_bigquery_client): - import google.cloud.bigquery - - df = pandas.DataFrame( - { - "int_col": [1, 2, 3], - "bool_col": [True, False, True], - "float_col": [0.0, 1.25, -2.75], - "string_col": ["a", "b", "c"], - "datetime_col": pandas.Series( - [ - "2021-12-21 13:28:40.123789", - "2000-01-01 11:10:09", - "2040-10-31 23:59:59.999999", - ], - dtype="datetime64[ns]", - ), - "timestamp_col": pandas.Series( - [ - "2021-12-21 13:28:40.123789", - "2000-01-01 11:10:09", - "2040-10-31 23:59:59.999999", - ], - dtype="datetime64[ns]", - ).dt.tz_localize(datetime.timezone.utc), - } - ) - destination = google.cloud.bigquery.TableReference.from_string( - "my-project.my_dataset.my_table" - ) - - _ = list( - load.load_csv_from_file(mock_bigquery_client, df, destination, None, None, None) - ) - - mock_load = mock_bigquery_client.load_table_from_file - assert mock_load.called - _, kwargs = mock_load.call_args - assert "job_config" in kwargs - sent_schema = kwargs["job_config"].schema - assert sent_schema[0].name == "int_col" - assert sent_schema[0].field_type == "INTEGER" - assert sent_schema[1].name == "bool_col" - assert sent_schema[1].field_type == "BOOLEAN" - assert sent_schema[2].name == "float_col" - assert sent_schema[2].field_type == "FLOAT" - assert sent_schema[3].name == "string_col" - assert sent_schema[3].field_type == "STRING" - # TODO: Disambiguate TIMESTAMP from DATETIME based on if column is - # localized. - assert sent_schema[4].name == "datetime_col" - assert sent_schema[4].field_type == "TIMESTAMP" - assert sent_schema[5].name == "timestamp_col" - assert sent_schema[5].field_type == "TIMESTAMP" - - @pytest.mark.parametrize( ["bigquery_has_from_dataframe_with_csv", "api_method"], [(True, "load_parquet"), (True, "load_csv"), (False, "load_csv")], From b9b017cc85cf8f5b28666f6918f6efb7a8a25fc7 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 22 Dec 2021 12:32:07 -0600 Subject: [PATCH 20/20] don't cover type checking only code, more generic type annotation --- pandas_gbq/gbq.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index cc6bef1f..5dcc3fd0 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -8,14 +8,14 @@ import warnings from datetime import datetime import typing -from typing import Dict, Optional, Union +from typing import Any, Dict, Optional, Union import numpy as np # Only import at module-level at type checking time to avoid circular # dependencies in the pandas package, which has an optional dependency on # pandas-gbq. -if typing.TYPE_CHECKING: +if typing.TYPE_CHECKING: # pragma: NO COVER import pandas # Required dependencies, but treat as optional so that _test_google_api_imports @@ -392,9 +392,7 @@ def download_table( table_id: str, max_results: Optional[int] = None, progress_bar_type: Optional[str] = None, - dtypes: Dict[ - str, Union[str, "pandas.api.extensions.ExtensionDtype", np.dtype] - ] = None, + dtypes: Optional[Dict[str, Union[str, Any]]] = None, ) -> "pandas.DataFrame": self._start_timer()