From 9a9d3fda24d41457cb0ab1c803388e096ab6afcc Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Mon, 6 Dec 2021 16:22:01 -0600
Subject: [PATCH 01/20] feat:  accepts a table ID, which downloads the table
 without a query

---
 pandas_gbq/gbq.py           | 38 +++++++++++++++++--------------------
 tests/system/conftest.py    | 19 +++++++++++++++++++
 tests/system/test_to_gbq.py | 19 +++++++------------
 3 files changed, 43 insertions(+), 33 deletions(-)
diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 87c2327c..714c0995 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -374,7 +374,9 @@ def process_http_error(ex):
 
         raise GenericGBQException("Reason: {0}".format(ex))
 
-    def run_query(self, query, max_results=None, progress_bar_type=None, **kwargs):
+    def run_query(
+        self, query_or_table, max_results=None, progress_bar_type=None, **kwargs
+    ):
         from concurrent.futures import TimeoutError
         from google.auth.exceptions import RefreshError
 
@@ -391,20 +393,20 @@ def run_query(self, query, max_results=None, progress_bar_type=None, **kwargs):
             job_config.update(config)
 
             if "query" in config and "query" in config["query"]:
-                if query is not None:
+                if query_or_table is not None:
                     raise ValueError(
                         "Query statement can't be specified "
                         "inside config while it is specified "
                         "as parameter"
                     )
-                query = config["query"].pop("query")
+                query_or_table = config["query"].pop("query")
 
         self._start_timer()
 
         try:
             logger.debug("Requesting query... ")
             query_reply = self.client.query(
-                query,
+                query_or_table,
                 job_config=bigquery.QueryJobConfig.from_api_repr(job_config),
                 location=self.location,
                 project=self.project_id,
@@ -639,7 +641,7 @@ def _cast_empty_df_dtypes(schema_fields, df):
 
 
 def read_gbq(
-    query,
+    query_or_table,
     project_id=None,
     index_col=None,
     col_order=None,
@@ -663,17 +665,18 @@ def read_gbq(
 
     This method uses the Google Cloud client library to make requests to
     Google BigQuery, documented `here
-    <https://google-cloud-python.readthedocs.io/en/latest/bigquery/usage.html>`__.
+    <https://googleapis.dev/python/bigquery/latest/index.html>`__.
 
     See the :ref:`How to authenticate with Google BigQuery <authentication>`
     guide for authentication instructions.
 
     Parameters
     ----------
-    query : str
-        SQL-Like Query to return data values.
+    query_or_table : str
+        SQL query to return data values. If the string is a table ID, fetch the
+        rows directly from the table without running a query.
     project_id : str, optional
-        Google BigQuery Account project ID. Optional when available from
+        Google Cloud Platform project ID. Optional when available from
         the environment.
     index_col : str, optional
         Name of result column to use for index in results DataFrame.
@@ -688,9 +691,9 @@ def read_gbq(
         when getting user credentials.
 
         .. _local webserver flow:
-            http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server
+            https://googleapis.dev/python/google-auth-oauthlib/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server
         .. _console flow:
-            http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console
+            https://googleapis.dev/python/google-auth-oauthlib/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console
 
         .. versionadded:: 0.2.0
     dialect : str, default 'standard'
@@ -740,13 +743,6 @@ def read_gbq(
         <https://cloud.google.com/bigquery/docs/access-control#roles>`__
         permission on the project you are billing queries to.
 
-        **Note:** Due to a `known issue in the ``google-cloud-bigquery``
-        package
-        <https://github.com/googleapis/google-cloud-python/pull/7633>`__
-        (fixed in version 1.11.0), you must write your query results to a
-        destination table. To do this with ``read_gbq``, supply a
-        ``configuration`` dictionary.
-
         This feature requires the ``google-cloud-bigquery-storage`` and
         ``pyarrow`` packages.
 
@@ -830,7 +826,7 @@ def read_gbq(
     )
 
     final_df = connector.run_query(
-        query,
+        query_or_table,
         configuration=configuration,
         max_results=max_results,
         progress_bar_type=progress_bar_type,
@@ -884,7 +880,7 @@ def to_gbq(
 
     This method uses the Google Cloud client library to make requests to
     Google BigQuery, documented `here
-    <https://google-cloud-python.readthedocs.io/en/latest/bigquery/usage.html>`__.
+    <https://googleapis.dev/python/bigquery/latest/index.html>`__.
 
     See the :ref:`How to authenticate with Google BigQuery <authentication>`
     guide for authentication instructions.
@@ -897,7 +893,7 @@ def to_gbq(
         Name of table to be written, in the form ``dataset.tablename`` or
         ``project.dataset.tablename``.
     project_id : str, optional
-        Google BigQuery Account project ID. Optional when available from
+        Google Cloud Platform project ID. Optional when available from
         the environment.
     chunksize : int, optional
         Number of rows to be inserted in each chunk from the dataframe.
diff --git a/tests/system/conftest.py b/tests/system/conftest.py
index 6ac55220..4ba8bf31 100644
--- a/tests/system/conftest.py
+++ b/tests/system/conftest.py
@@ -3,6 +3,7 @@
 # license that can be found in the LICENSE file.
 
 import os
+import functools
 import pathlib
 
 from google.cloud import bigquery
@@ -56,6 +57,24 @@ def project(project_id):
     return project_id
 
 
+@pytest.fixture
+def to_gbq(credentials, project_id):
+    import pandas_gbq
+
+    return functools.partial(
+        pandas_gbq.to_gbq, project_id=project_id, credentials=credentials
+    )
+
+
+@pytest.fixture
+def read_gbq(credentials, project_id):
+    import pandas_gbq
+
+    return functools.partial(
+        pandas_gbq.read_gbq, project_id=project_id, credentials=credentials
+    )
+
+
 @pytest.fixture()
 def random_dataset_id(bigquery_client: bigquery.Client, project_id: str):
     dataset_id = prefixer.create_prefix()
diff --git a/tests/system/test_to_gbq.py b/tests/system/test_to_gbq.py
index 4421f3be..f8d9c7f7 100644
--- a/tests/system/test_to_gbq.py
+++ b/tests/system/test_to_gbq.py
@@ -5,7 +5,6 @@
 import datetime
 import decimal
 import collections
-import functools
 import random
 
 import db_dtypes
@@ -23,12 +22,8 @@ def api_method(request):
 
 
 @pytest.fixture
-def method_under_test(credentials, project_id):
-    import pandas_gbq
-
-    return functools.partial(
-        pandas_gbq.to_gbq, project_id=project_id, credentials=credentials
-    )
+def method_under_test(to_gbq):
+    return to_gbq
 
 
 SeriesRoundTripTestCase = collections.namedtuple(
@@ -98,7 +93,7 @@ def method_under_test(credentials, project_id):
 def test_series_round_trip(
     method_under_test,
     random_dataset_id,
-    bigquery_client,
+    read_gbq,
     input_series,
     api_method,
     api_methods,
@@ -114,7 +109,7 @@ def test_series_round_trip(
     )
     method_under_test(df, table_id, api_method=api_method)
 
-    round_trip = bigquery_client.list_rows(table_id).to_dataframe()
+    round_trip = read_gbq(table_id)
     round_trip_series = round_trip["test_col"].sort_values().reset_index(drop=True)
     pandas.testing.assert_series_equal(
         round_trip_series, input_series, check_exact=True, check_names=False,
@@ -196,8 +191,8 @@ def test_series_round_trip(
 )
 def test_dataframe_round_trip_with_table_schema(
     method_under_test,
+    read_gbq,
     random_dataset_id,
-    bigquery_client,
     input_df,
     expected_df,
     table_schema,
@@ -212,8 +207,8 @@ def test_dataframe_round_trip_with_table_schema(
     method_under_test(
         input_df, table_id, table_schema=table_schema, api_method=api_method
     )
-    round_trip = bigquery_client.list_rows(table_id).to_dataframe(
-        dtypes=dict(zip(expected_df.columns, expected_df.dtypes))
+    round_trip = read_gbq(
+        table_id, dtypes=dict(zip(expected_df.columns, expected_df.dtypes)),
     )
     round_trip.sort_values("row_num", inplace=True)
     pandas.testing.assert_frame_equal(expected_df, round_trip)

From 6adf2332fa7726532872a68e3283e004f9c3c1db Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Mon, 6 Dec 2021 17:12:17 -0600
Subject: [PATCH 02/20] add todo for next steps

---
 pandas_gbq/gbq.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 714c0995..d2cc38f9 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -506,13 +506,18 @@ def _download_results(
             to_dataframe_kwargs["create_bqstorage_client"] = create_bqstorage_client
 
         try:
+            # TODO: This is the only difference between table ID and query job.
+            # But should I refactor for
+            # https://github.com/googleapis/python-bigquery-pandas/issues/339
+            # now?
             query_job.result()
             # Get the table schema, so that we can list rows.
             destination = self.client.get_table(query_job.destination)
             rows_iter = self.client.list_rows(destination, max_results=max_results)
-
             schema_fields = [field.to_api_repr() for field in rows_iter.schema]
             conversion_dtypes = _bqschema_to_nullsafe_dtypes(schema_fields)
+            # ENDTODO: This is the only difference between table ID and
+
             conversion_dtypes.update(user_dtypes)
             df = rows_iter.to_dataframe(
                 dtypes=conversion_dtypes,

From 9b1eb0dc709beccdd59058874d6b9a7339da5864 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Thu, 9 Dec 2021 14:28:34 -0600
Subject: [PATCH 03/20] add unit test for table ID read_gbq

---
 tests/unit/conftest.py | 17 ++++++++++++++---
 tests/unit/test_gbq.py | 19 ++++++++++++++++++-
 2 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py
index cfa1e819..513df4b9 100644
--- a/tests/unit/conftest.py
+++ b/tests/unit/conftest.py
@@ -26,18 +26,29 @@ def mock_bigquery_client(monkeypatch):
     # Constructor returns the mock itself, so this mock can be treated as the
     # constructor or the instance.
     mock_client.return_value = mock_client
-    mock_schema = [google.cloud.bigquery.SchemaField("_f0", "INTEGER")]
-    # Mock out SELECT 1 query results.
+
     mock_query = mock.create_autospec(google.cloud.bigquery.QueryJob)
     mock_query.job_id = "some-random-id"
     mock_query.state = "DONE"
     mock_rows = mock.create_autospec(google.cloud.bigquery.table.RowIterator)
     mock_rows.total_rows = 1
-    mock_rows.schema = mock_schema
+
     mock_rows.__iter__.return_value = [(1,)]
     mock_query.result.return_value = mock_rows
+    mock_client.list_rows.return_value = mock_rows
     mock_client.query.return_value = mock_query
     # Mock table creation.
     monkeypatch.setattr(google.cloud.bigquery, "Client", mock_client)
     mock_client.reset_mock()
+
+    # Mock out SELECT 1 query results.
+    def generate_schema():
+        query = mock_client.query.call_args[0][0]
+        if query == "SELECT 1 AS int_col":
+            return [google.cloud.bigquery.SchemaField("int_col", "INTEGER")]
+        else:
+            return [google.cloud.bigquery.SchemaField("_f0", "INTEGER")]
+
+    type(mock_rows).schema = mock.PropertyMock(side_effect=generate_schema)
+
     return mock_client
diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py
index 8784a98b..bc12c47c 100644
--- a/tests/unit/test_gbq.py
+++ b/tests/unit/test_gbq.py
@@ -292,9 +292,10 @@ def test_read_gbq_with_no_project_id_given_should_fail(monkeypatch):
         gbq.read_gbq("SELECT 1", dialect="standard")
 
 
-def test_read_gbq_with_inferred_project_id(monkeypatch):
+def test_read_gbq_with_inferred_project_id(mock_bigquery_client):
     df = gbq.read_gbq("SELECT 1", dialect="standard")
     assert df is not None
+    mock_bigquery_client.query.assert_called_once()
 
 
 def test_read_gbq_with_inferred_project_id_from_service_account_credentials(
@@ -505,3 +506,19 @@ def test_read_gbq_calls_tqdm(mock_bigquery_client, mock_service_account_credenti
 
     _, to_dataframe_kwargs = mock_list_rows.to_dataframe.call_args
     assert to_dataframe_kwargs["progress_bar_type"] == "foobar"
+
+
+def test_read_gbq_bypasses_query_with_table_id(
+    mock_bigquery_client, mock_service_account_credentials
+):
+    mock_service_account_credentials.project_id = "service_account_project_id"
+    df = gbq.read_gbq(
+        "my-project.my_dataset.read_gbq_table",
+        credentials=mock_service_account_credentials,
+    )
+    assert df is not None
+
+    mock_bigquery_client.query.assert_not_called()
+    mock_bigquery_client.list_rows.assert_called_with(
+        "my-project.my_dataset.read_gbq_table"
+    )

From ec9ddaff46e1911e24fbba4cfc66e743515dde63 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Thu, 9 Dec 2021 15:28:44 -0600
Subject: [PATCH 04/20] add helper for is_query

---
 pandas_gbq/gbq.py      |  5 +++++
 tests/unit/test_gbq.py | 13 +++++++++++++
 2 files changed, 18 insertions(+)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index bba98f57..07fc6852 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -3,6 +3,7 @@
 # license that can be found in the LICENSE file.
 
 import logging
+import re
 import time
 import warnings
 from datetime import datetime
@@ -64,6 +65,10 @@ def _test_google_api_imports():
         raise ImportError("pandas-gbq requires google-cloud-bigquery") from ex
 
 
+def _is_query(query_or_table: str) -> bool:
+    return re.search(r"\s", query_or_table.strip(), re.MULTILINE) is not None
+
+
 class DatasetCreationError(ValueError):
     """
     Raised when the create dataset method fails
diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py
index bc12c47c..496486ef 100644
--- a/tests/unit/test_gbq.py
+++ b/tests/unit/test_gbq.py
@@ -82,6 +82,19 @@ def test__bqschema_to_nullsafe_dtypes(type_, expected):
         assert result == {"x": expected}
 
 
+@pytest.mark.parametrize(
+    ["query_or_table", "expected"],
+    [
+        ("SELECT 1", True),
+        ("dataset.table", False),
+        ("project-id.dataset.table", False),
+    ],
+)
+def test__is_query(query_or_table, expected):
+    result = gbq._is_query(query_or_table)
+    assert result == expected
+
+
 def test_GbqConnector_get_client_w_old_bq(monkeypatch, mock_bigquery_client):
     gbq._test_google_api_imports()
     connector = _make_connector()

From 9cc7c74c3d2f76de9ba1beb6fd15156101717be6 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Fri, 10 Dec 2021 15:07:44 -0600
Subject: [PATCH 05/20] implement read_gbq with table id

---
 pandas_gbq/gbq.py      | 85 ++++++++++++++++++++++++++++--------------
 tests/unit/conftest.py |  8 +++-
 tests/unit/test_gbq.py | 37 +++++++++++++++++-
 3 files changed, 99 insertions(+), 31 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 07fc6852..6a8b6788 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -379,9 +379,26 @@ def process_http_error(ex):
 
         raise GenericGBQException("Reason: {0}".format(ex))
 
-    def run_query(
-        self, query_or_table, max_results=None, progress_bar_type=None, **kwargs
+    def download_table(
+        self, table_id, max_results=None, progress_bar_type=None, dtypes=None
     ):
+        self._start_timer()
+
+        try:
+            # Get the table schema, so that we can list rows.
+            destination = self.client.get_table(table_id)
+            rows_iter = self.client.list_rows(destination, max_results=max_results)
+        except self.http_error as ex:
+            self.process_http_error(ex)
+
+        return self._download_results(
+            rows_iter,
+            max_results=max_results,
+            progress_bar_type=progress_bar_type,
+            user_dtypes=dtypes,
+        )
+
+    def run_query(self, query, max_results=None, progress_bar_type=None, **kwargs):
         from concurrent.futures import TimeoutError
         from google.auth.exceptions import RefreshError
 
@@ -397,21 +414,12 @@ def run_query(
         if config is not None:
             job_config.update(config)
 
-            if "query" in config and "query" in config["query"]:
-                if query_or_table is not None:
-                    raise ValueError(
-                        "Query statement can't be specified "
-                        "inside config while it is specified "
-                        "as parameter"
-                    )
-                query_or_table = config["query"].pop("query")
-
         self._start_timer()
 
         try:
             logger.debug("Requesting query... ")
             query_reply = self.client.query(
-                query_or_table,
+                query,
                 job_config=bigquery.QueryJobConfig.from_api_repr(job_config),
                 location=self.location,
                 project=self.project_id,
@@ -471,15 +479,25 @@ def run_query(
             )
 
         dtypes = kwargs.get("dtypes")
+
+        # Ensure destination is populated.
+        try:
+            query_reply.result()
+        except self.http_error as ex:
+            self.process_http_error(ex)
+
+        # Get the table schema, so that we can list rows.
+        destination = self.client.get_table(query_reply.destination)
+        rows_iter = self.client.list_rows(destination, max_results=max_results)
         return self._download_results(
-            query_reply,
+            rows_iter,
             max_results=max_results,
             progress_bar_type=progress_bar_type,
             user_dtypes=dtypes,
         )
 
     def _download_results(
-        self, query_job, max_results=None, progress_bar_type=None, user_dtypes=None,
+        self, rows_iter, max_results=None, progress_bar_type=None, user_dtypes=None,
     ):
         # No results are desired, so don't bother downloading anything.
         if max_results == 0:
@@ -511,14 +529,6 @@ def _download_results(
             to_dataframe_kwargs["create_bqstorage_client"] = create_bqstorage_client
 
         try:
-            # TODO: This is the only difference between table ID and query job.
-            # But should I refactor for
-            # https://github.com/googleapis/python-bigquery-pandas/issues/339
-            # now?
-            query_job.result()
-            # Get the table schema, so that we can list rows.
-            destination = self.client.get_table(query_job.destination)
-            rows_iter = self.client.list_rows(destination, max_results=max_results)
             schema_fields = [field.to_api_repr() for field in rows_iter.schema]
             conversion_dtypes = _bqschema_to_nullsafe_dtypes(schema_fields)
             # ENDTODO: This is the only difference between table ID and
@@ -829,6 +839,15 @@ def read_gbq(
     if dialect not in ("legacy", "standard"):
         raise ValueError("'{0}' is not valid for dialect".format(dialect))
 
+    if configuration and "query" in configuration and "query" in configuration["query"]:
+        if query_or_table is not None:
+            raise ValueError(
+                "Query statement can't be specified "
+                "inside config while it is specified "
+                "as parameter"
+            )
+        query_or_table = configuration["query"].pop("query")
+
     connector = GbqConnector(
         project_id,
         reauth=reauth,
@@ -840,13 +859,21 @@ def read_gbq(
         use_bqstorage_api=use_bqstorage_api,
     )
 
-    final_df = connector.run_query(
-        query_or_table,
-        configuration=configuration,
-        max_results=max_results,
-        progress_bar_type=progress_bar_type,
-        dtypes=dtypes,
-    )
+    if _is_query(query_or_table):
+        final_df = connector.run_query(
+            query_or_table,
+            configuration=configuration,
+            max_results=max_results,
+            progress_bar_type=progress_bar_type,
+            dtypes=dtypes,
+        )
+    else:
+        final_df = connector.download_table(
+            query_or_table,
+            max_results=max_results,
+            progress_bar_type=progress_bar_type,
+            dtypes=dtypes,
+        )
 
     # Reindex the DataFrame on the provided column
     if index_col is not None:
diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py
index 513df4b9..3f0c5e53 100644
--- a/tests/unit/conftest.py
+++ b/tests/unit/conftest.py
@@ -43,7 +43,7 @@ def mock_bigquery_client(monkeypatch):
 
     # Mock out SELECT 1 query results.
     def generate_schema():
-        query = mock_client.query.call_args[0][0]
+        query = mock_client.query.call_args[0][0] if mock_client.query.call_args else ""
         if query == "SELECT 1 AS int_col":
             return [google.cloud.bigquery.SchemaField("int_col", "INTEGER")]
         else:
@@ -51,4 +51,10 @@ def generate_schema():
 
     type(mock_rows).schema = mock.PropertyMock(side_effect=generate_schema)
 
+    # Mock out get_table.
+    def get_table(table_ref_or_id, **kwargs):
+        return google.cloud.bigquery.Table(table_ref_or_id)
+
+    mock_client.get_table.side_effect = get_table
+
     return mock_client
diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py
index 496486ef..480f0000 100644
--- a/tests/unit/test_gbq.py
+++ b/tests/unit/test_gbq.py
@@ -521,13 +521,14 @@ def test_read_gbq_calls_tqdm(mock_bigquery_client, mock_service_account_credenti
     assert to_dataframe_kwargs["progress_bar_type"] == "foobar"
 
 
-def test_read_gbq_bypasses_query_with_table_id(
+def test_read_gbq_with_full_table_id(
     mock_bigquery_client, mock_service_account_credentials
 ):
     mock_service_account_credentials.project_id = "service_account_project_id"
     df = gbq.read_gbq(
         "my-project.my_dataset.read_gbq_table",
         credentials=mock_service_account_credentials,
+        project_id="param-project",
     )
     assert df is not None
 
@@ -535,3 +536,37 @@ def test_read_gbq_bypasses_query_with_table_id(
     mock_bigquery_client.list_rows.assert_called_with(
         "my-project.my_dataset.read_gbq_table"
     )
+
+
+def test_read_gbq_with_partial_table_id(
+    mock_bigquery_client, mock_service_account_credentials
+):
+    mock_service_account_credentials.project_id = "service_account_project_id"
+    df = gbq.read_gbq(
+        "my_dataset.read_gbq_table",
+        credentials=mock_service_account_credentials,
+        project_id="param-project",
+    )
+    assert df is not None
+
+    mock_bigquery_client.query.assert_not_called()
+    mock_bigquery_client.list_rows.assert_called_with(
+        "param-project.my_dataset.read_gbq_table"
+    )
+
+
+def test_read_gbq_bypasses_query_with_table_id_and_max_results(
+    mock_bigquery_client, mock_service_account_credentials
+):
+    mock_service_account_credentials.project_id = "service_account_project_id"
+    df = gbq.read_gbq(
+        "my-project.my_dataset.read_gbq_table",
+        credentials=mock_service_account_credentials,
+        max_results=11,
+    )
+    assert df is not None
+
+    mock_bigquery_client.query.assert_not_called()
+    mock_bigquery_client.list_rows.assert_called_with(
+        "my-project.my_dataset.read_gbq_table", max_results=11
+    )

From dd51ad8e7d9c51301e33ec2422fe6d80013e7322 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Fri, 10 Dec 2021 15:49:49 -0600
Subject: [PATCH 06/20] fix remaining tests, don't localalize out-of-bounds
 timestamp columns

---
 pandas_gbq/gbq.py       | 5 ++++-
 pandas_gbq/timestamp.py | 8 +++++++-
 tests/unit/test_gbq.py  | 7 ++++---
 3 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 6a8b6788..247df17c 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -386,7 +386,10 @@ def download_table(
 
         try:
             # Get the table schema, so that we can list rows.
-            destination = self.client.get_table(table_id)
+            table_ref = bigquery.TableReference.from_string(
+                table_id, default_project=self.project_id
+            )
+            destination = self.client.get_table(table_ref)
             rows_iter = self.client.list_rows(destination, max_results=max_results)
         except self.http_error as ex:
             self.process_http_error(ex)
diff --git a/pandas_gbq/timestamp.py b/pandas_gbq/timestamp.py
index e0b41475..c6bb6d93 100644
--- a/pandas_gbq/timestamp.py
+++ b/pandas_gbq/timestamp.py
@@ -7,6 +7,8 @@
 Private module.
 """
 
+import pandas.api.types
+
 
 def localize_df(df, schema_fields):
     """Localize any TIMESTAMP columns to tz-aware type.
@@ -38,7 +40,11 @@ def localize_df(df, schema_fields):
         if "mode" in field and field["mode"].upper() == "REPEATED":
             continue
 
-        if field["type"].upper() == "TIMESTAMP" and df[column].dt.tz is None:
+        if (
+            field["type"].upper() == "TIMESTAMP"
+            and pandas.api.types.is_datetime64_ns_dtype(df.dtypes[column])
+            and df[column].dt.tz is None
+        ):
             df[column] = df[column].dt.tz_localize("UTC")
 
     return df
diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py
index 480f0000..7593eea5 100644
--- a/tests/unit/test_gbq.py
+++ b/tests/unit/test_gbq.py
@@ -8,6 +8,7 @@
 import datetime
 from unittest import mock
 
+from google.cloud import bigquery
 import numpy
 import pandas
 from pandas import DataFrame
@@ -534,7 +535,7 @@ def test_read_gbq_with_full_table_id(
 
     mock_bigquery_client.query.assert_not_called()
     mock_bigquery_client.list_rows.assert_called_with(
-        "my-project.my_dataset.read_gbq_table"
+        bigquery.Table("my-project.my_dataset.read_gbq_table"), max_results=None,
     )
 
 
@@ -551,7 +552,7 @@ def test_read_gbq_with_partial_table_id(
 
     mock_bigquery_client.query.assert_not_called()
     mock_bigquery_client.list_rows.assert_called_with(
-        "param-project.my_dataset.read_gbq_table"
+        bigquery.Table("param-project.my_dataset.read_gbq_table"), max_results=None,
     )
 
 
@@ -568,5 +569,5 @@ def test_read_gbq_bypasses_query_with_table_id_and_max_results(
 
     mock_bigquery_client.query.assert_not_called()
     mock_bigquery_client.list_rows.assert_called_with(
-        "my-project.my_dataset.read_gbq_table", max_results=11
+        bigquery.Table("my-project.my_dataset.read_gbq_table"), max_results=11
     )

From e1ad679671f920b5964b7b987d9ceb3b36dca10e Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Fri, 10 Dec 2021 15:52:12 -0600
Subject: [PATCH 07/20] Update pandas_gbq/gbq.py

---
 pandas_gbq/gbq.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 247df17c..fdd4dcc6 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -534,8 +534,6 @@ def _download_results(
         try:
             schema_fields = [field.to_api_repr() for field in rows_iter.schema]
             conversion_dtypes = _bqschema_to_nullsafe_dtypes(schema_fields)
-            # ENDTODO: This is the only difference between table ID and
-
             conversion_dtypes.update(user_dtypes)
             df = rows_iter.to_dataframe(
                 dtypes=conversion_dtypes,

From d29bc2ac072f0c1673944557b0dc53c12487a99a Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Fri, 10 Dec 2021 16:30:35 -0600
Subject: [PATCH 08/20] fix 3.7 unit tests

---
 noxfile.py             |  2 +-
 tests/unit/test_gbq.py | 24 ++++++++++++++----------
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/noxfile.py b/noxfile.py
index df3378bf..7530c68a 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -259,7 +259,7 @@ def cover(session):
     test runs (not system test runs), and then erases coverage data.
     """
     session.install("coverage", "pytest-cov")
-    session.run("coverage", "report", "--show-missing", "--fail-under=88")
+    session.run("coverage", "report", "--show-missing", "--fail-under=91")
 
     session.run("coverage", "erase")
 
diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py
index 7593eea5..142771d1 100644
--- a/tests/unit/test_gbq.py
+++ b/tests/unit/test_gbq.py
@@ -8,7 +8,6 @@
 import datetime
 from unittest import mock
 
-from google.cloud import bigquery
 import numpy
 import pandas
 from pandas import DataFrame
@@ -534,9 +533,10 @@ def test_read_gbq_with_full_table_id(
     assert df is not None
 
     mock_bigquery_client.query.assert_not_called()
-    mock_bigquery_client.list_rows.assert_called_with(
-        bigquery.Table("my-project.my_dataset.read_gbq_table"), max_results=None,
-    )
+    sent_table = mock_bigquery_client.list_rows.call_args[0][0]
+    assert sent_table.project == "my-project"
+    assert sent_table.dataset_id == "my_dataset"
+    assert sent_table.table_id == "read_gbq_table"
 
 
 def test_read_gbq_with_partial_table_id(
@@ -551,9 +551,10 @@ def test_read_gbq_with_partial_table_id(
     assert df is not None
 
     mock_bigquery_client.query.assert_not_called()
-    mock_bigquery_client.list_rows.assert_called_with(
-        bigquery.Table("param-project.my_dataset.read_gbq_table"), max_results=None,
-    )
+    sent_table = mock_bigquery_client.list_rows.call_args[0][0]
+    assert sent_table.project == "param-project"
+    assert sent_table.dataset_id == "my_dataset"
+    assert sent_table.table_id == "read_gbq_table"
 
 
 def test_read_gbq_bypasses_query_with_table_id_and_max_results(
@@ -568,6 +569,9 @@ def test_read_gbq_bypasses_query_with_table_id_and_max_results(
     assert df is not None
 
     mock_bigquery_client.query.assert_not_called()
-    mock_bigquery_client.list_rows.assert_called_with(
-        bigquery.Table("my-project.my_dataset.read_gbq_table"), max_results=11
-    )
+    sent_table = mock_bigquery_client.list_rows.call_args[0][0]
+    assert sent_table.project == "my-project"
+    assert sent_table.dataset_id == "my_dataset"
+    assert sent_table.table_id == "read_gbq_table"
+    sent_max_results = mock_bigquery_client.list_rows.call_args[1]["max_results"]
+    assert sent_max_results == 11

From cb8f24f5153535fdff344f2b3837b10222b4e322 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Fri, 10 Dec 2021 16:32:56 -0600
Subject: [PATCH 09/20] correct coverage

---
 noxfile.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/noxfile.py b/noxfile.py
index 7530c68a..398b4dc2 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -259,7 +259,7 @@ def cover(session):
     test runs (not system test runs), and then erases coverage data.
     """
     session.install("coverage", "pytest-cov")
-    session.run("coverage", "report", "--show-missing", "--fail-under=91")
+    session.run("coverage", "report", "--show-missing", "--fail-under=89")
 
     session.run("coverage", "erase")
 

From 56b73b213444955b28041f1822c6ceccee93916c Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Fri, 10 Dec 2021 16:34:30 -0600
Subject: [PATCH 10/20] skip coverage for optional test skip

---
 tests/unit/test_gbq.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py
index 142771d1..0c27dd76 100644
--- a/tests/unit/test_gbq.py
+++ b/tests/unit/test_gbq.py
@@ -487,7 +487,7 @@ def test_read_gbq_passes_dtypes(mock_bigquery_client, mock_service_account_crede
 def test_read_gbq_use_bqstorage_api(
     mock_bigquery_client, mock_service_account_credentials
 ):
-    if not FEATURES.bigquery_has_bqstorage:
+    if not FEATURES.bigquery_has_bqstorage:  # pragma: NO COVER
         pytest.skip("requires BigQuery Storage API")
 
     mock_service_account_credentials.project_id = "service_account_project_id"

From 8a61e97e31d5fd5a29898554f52cb66c422f12e9 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Fri, 10 Dec 2021 16:47:34 -0600
Subject: [PATCH 11/20] fix  docs build

---
 pandas_gbq/gbq.py | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index fdd4dcc6..41cb2f5b 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -712,14 +712,14 @@ def read_gbq(
     reauth : boolean, default False
         Force Google BigQuery to re-authenticate the user. This is useful
         if multiple accounts are used.
-    auth_local_webserver : boolean, default False
-        Use the `local webserver flow`_ instead of the `console flow`_
-        when getting user credentials.
-
-        .. _local webserver flow:
-            https://googleapis.dev/python/google-auth-oauthlib/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server
-        .. _console flow:
-            https://googleapis.dev/python/google-auth-oauthlib/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console
+    auth_local_webserver : bool, default False
+        Use the `local webserver flow
+        <https://googleapis.dev/python/google-auth-oauthlib/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server>`_
+        instead of the `console flow
+        <https://googleapis.dev/python/google-auth-oauthlib/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console>`_
+        when getting user credentials. Your code must run on the same machine
+        as your web browser and your web browser can access your application
+        via ``localhost:808X``.
 
         .. versionadded:: 0.2.0
     dialect : str, default 'standard'
@@ -954,13 +954,13 @@ def to_gbq(
         ``'append'``
             If table exists, insert data. Create if does not exist.
     auth_local_webserver : bool, default False
-        Use the `local webserver flow`_ instead of the `console flow`_
-        when getting user credentials.
-
-        .. _local webserver flow:
-            http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server
-        .. _console flow:
-            http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console
+        Use the `local webserver flow
+        <https://googleapis.dev/python/google-auth-oauthlib/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server>`_
+        instead of the `console flow
+        <https://googleapis.dev/python/google-auth-oauthlib/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console>`_
+        when getting user credentials. Your code must run on the same machine
+        as your web browser and your web browser can access your application
+        via ``localhost:808X``.
 
         .. versionadded:: 0.2.0
     table_schema : list of dicts, optional

From 3f7900bf184a10337c9bab19fa703211650da1df Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Fri, 10 Dec 2021 16:56:20 -0600
Subject: [PATCH 12/20] improve test coverage for error case

---
 tests/unit/test_gbq.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py
index 0c27dd76..9a0e8ce3 100644
--- a/tests/unit/test_gbq.py
+++ b/tests/unit/test_gbq.py
@@ -8,6 +8,7 @@
 import datetime
 from unittest import mock
 
+import google.api_core.exceptions
 import numpy
 import pandas
 from pandas import DataFrame
@@ -575,3 +576,17 @@ def test_read_gbq_bypasses_query_with_table_id_and_max_results(
     assert sent_table.table_id == "read_gbq_table"
     sent_max_results = mock_bigquery_client.list_rows.call_args[1]["max_results"]
     assert sent_max_results == 11
+
+
+def test_read_gbq_with_list_rows_error_translates_exception(
+    mock_bigquery_client, mock_service_account_credentials
+):
+    mock_bigquery_client.list_rows.side_effect = (
+        google.api_core.exceptions.NotFound("table not found"),
+    )
+
+    with pytest.raises(gbq.GenericGBQException, match="table not found"):
+        gbq.read_gbq(
+            "my-project.my_dataset.read_gbq_table",
+            credentials=mock_service_account_credentials,
+        )

From 3c53f1f697265a9034b00fadfe99f525100f8eae Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Mon, 13 Dec 2021 10:21:46 -0600
Subject: [PATCH 13/20] as of google-cloud-bigquery 1.11.0, get_table before
 list_rows is unnecessary

---
 pandas_gbq/gbq.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 41cb2f5b..1ba64057 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -385,12 +385,10 @@ def download_table(
         self._start_timer()
 
         try:
-            # Get the table schema, so that we can list rows.
             table_ref = bigquery.TableReference.from_string(
                 table_id, default_project=self.project_id
             )
-            destination = self.client.get_table(table_ref)
-            rows_iter = self.client.list_rows(destination, max_results=max_results)
+            rows_iter = self.client.list_rows(table_ref, max_results=max_results)
         except self.http_error as ex:
             self.process_http_error(ex)
 
@@ -489,9 +487,9 @@ def run_query(self, query, max_results=None, progress_bar_type=None, **kwargs):
         except self.http_error as ex:
             self.process_http_error(ex)
 
-        # Get the table schema, so that we can list rows.
-        destination = self.client.get_table(query_reply.destination)
-        rows_iter = self.client.list_rows(destination, max_results=max_results)
+        rows_iter = self.client.list_rows(
+            query_reply.destination, max_results=max_results
+        )
         return self._download_results(
             rows_iter,
             max_results=max_results,

From 5ce125f13934b27a16889ae22aabf65d77837178 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Mon, 20 Dec 2021 13:42:21 -0600
Subject: [PATCH 14/20] tests with whitespace

---
 tests/unit/test_gbq.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py
index 9a0e8ce3..df9241bc 100644
--- a/tests/unit/test_gbq.py
+++ b/tests/unit/test_gbq.py
@@ -87,8 +87,14 @@ def test__bqschema_to_nullsafe_dtypes(type_, expected):
     ["query_or_table", "expected"],
     [
         ("SELECT 1", True),
+        ("SELECT\n1", True),
+        ("SELECT\t1", True),
         ("dataset.table", False),
+        (" dataset.table ", False),
+        ("\r\ndataset.table\r\n", False),
         ("project-id.dataset.table", False),
+        (" project-id.dataset.table ", False),
+        ("\r\nproject-id.dataset.table\r\n", False),
     ],
 )
 def test__is_query(query_or_table, expected):

From ea660f41e7717a20038119a17890795c3cfedb38 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Mon, 20 Dec 2021 13:49:25 -0600
Subject: [PATCH 15/20] type annotations

---
 pandas_gbq/gbq.py | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 1ba64057..cc6bef1f 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -7,9 +7,17 @@
 import time
 import warnings
 from datetime import datetime
+import typing
+from typing import Dict, Optional, Union
 
 import numpy as np
 
+# Only import at module-level at type checking time to avoid circular
+# dependencies in the pandas package, which has an optional dependency on
+# pandas-gbq.
+if typing.TYPE_CHECKING:
+    import pandas
+
 # Required dependencies, but treat as optional so that _test_google_api_imports
 # can provide a better error message.
 try:
@@ -380,8 +388,14 @@ def process_http_error(ex):
         raise GenericGBQException("Reason: {0}".format(ex))
 
     def download_table(
-        self, table_id, max_results=None, progress_bar_type=None, dtypes=None
-    ):
+        self,
+        table_id: str,
+        max_results: Optional[int] = None,
+        progress_bar_type: Optional[str] = None,
+        dtypes: Dict[
+            str, Union[str, "pandas.api.extensions.ExtensionDtype", np.dtype]
+        ] = None,
+    ) -> "pandas.DataFrame":
         self._start_timer()
 
         try:

From 670499150b81c1598d29db451fded75b51dbb057 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Mon, 20 Dec 2021 17:26:48 -0600
Subject: [PATCH 16/20] improve coverage in owlbot config

---
 owlbot.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/owlbot.py b/owlbot.py
index 5ef93de7..9849f98f 100644
--- a/owlbot.py
+++ b/owlbot.py
@@ -33,7 +33,7 @@
 templated_files = common.py_library(
     unit_test_python_versions=["3.7", "3.8", "3.9", "3.10"],
     system_test_python_versions=["3.7", "3.8", "3.9", "3.10"],
-    cov_level=88,
+    cov_level=89,
     unit_test_extras=extras,
     system_test_extras=extras,
     intersphinx_dependencies={

From 9a1ca1651fbfb1612d710f8d0177aa43a12ed187 Mon Sep 17 00:00:00 2001
From: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
Date: Mon, 20 Dec 2021 23:29:09 +0000
Subject: [PATCH 17/20] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md
---
 .coveragerc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.coveragerc b/.coveragerc
index ba50bf32..88b85d03 100644
--- a/.coveragerc
+++ b/.coveragerc
@@ -22,7 +22,7 @@ omit =
   google/cloud/__init__.py
 
 [report]
-fail_under = 88
+fail_under = 89
 show_missing = True
 exclude_lines =
     # Re-enable the standard pragma

From a9075df20e378ee507d1e28019ce5ecfb6741ada Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Tue, 21 Dec 2021 13:36:42 -0600
Subject: [PATCH 18/20] boost coverage

---
 pandas_gbq/load.py      |  7 +++++-
 pandas_gbq/schema.py    |  2 ++
 tests/unit/test_load.py | 56 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 64 insertions(+), 1 deletion(-)

diff --git a/pandas_gbq/load.py b/pandas_gbq/load.py
index 315ad5cd..588a6719 100644
--- a/pandas_gbq/load.py
+++ b/pandas_gbq/load.py
@@ -185,6 +185,11 @@ def load_csv_from_file(
     chunksize: Optional[int],
     schema: Optional[Dict[str, Any]],
 ):
+    """Manually encode a DataFrame to CSV and use the buffer in a load job.
+
+    This method is needed for writing with google-cloud-bigquery versions that
+    don't implment load_table_from_dataframe with the CSV serialization format.
+    """
     if schema is None:
         schema = pandas_gbq.schema.generate_bq_schema(dataframe)
 
@@ -203,7 +208,7 @@ def load_chunk(chunk, job_config):
         finally:
             chunk_buffer.close()
 
-    return load_csv(dataframe, chunksize, bq_schema, load_chunk,)
+    return load_csv(dataframe, chunksize, bq_schema, load_chunk)
 
 
 def load_chunks(
diff --git a/pandas_gbq/schema.py b/pandas_gbq/schema.py
index e2f97455..118e00f0 100644
--- a/pandas_gbq/schema.py
+++ b/pandas_gbq/schema.py
@@ -101,6 +101,8 @@ def generate_bq_schema(dataframe, default_type="STRING"):
         "S": "STRING",
         "U": "STRING",
         "M": "TIMESTAMP",
+        # TODO: Disambiguate TIMESTAMP from DATETIME based on if column is
+        #       localized.
     }
 
     fields = []
diff --git a/tests/unit/test_load.py b/tests/unit/test_load.py
index 8e18cfb9..3f32bff9 100644
--- a/tests/unit/test_load.py
+++ b/tests/unit/test_load.py
@@ -95,6 +95,62 @@ def test_encode_chunks_with_chunksize_none():
     assert len(chunk.index) == 6
 
 
+def test_load_csv_from_file_generates_schema(mock_bigquery_client):
+    import google.cloud.bigquery
+
+    df = pandas.DataFrame(
+        {
+            "int_col": [1, 2, 3],
+            "bool_col": [True, False, True],
+            "float_col": [0.0, 1.25, -2.75],
+            "string_col": ["a", "b", "c"],
+            "datetime_col": pandas.Series(
+                [
+                    "2021-12-21 13:28:40.123789",
+                    "2000-01-01 11:10:09",
+                    "2040-10-31 23:59:59.999999",
+                ],
+                dtype="datetime64[ns]",
+            ),
+            "timestamp_col": pandas.Series(
+                [
+                    "2021-12-21 13:28:40.123789",
+                    "2000-01-01 11:10:09",
+                    "2040-10-31 23:59:59.999999",
+                ],
+                dtype="datetime64[ns]",
+            ).dt.tz_localize(datetime.timezone.utc),
+        }
+    )
+    destination = google.cloud.bigquery.TableReference.from_string(
+        "my-project.my_dataset.my_table"
+    )
+
+    _ = list(
+        load.load_csv_from_file(mock_bigquery_client, df, destination, None, None, None)
+    )
+
+    mock_load = mock_bigquery_client.load_table_from_file
+    assert mock_load.called
+    _, kwargs = mock_load.call_args
+    assert "job_config" in kwargs
+    sent_schema = kwargs["job_config"].schema
+    assert sent_schema[0].name == "int_col"
+    assert sent_schema[0].field_type == "INTEGER"
+    assert sent_schema[1].name == "bool_col"
+    assert sent_schema[1].field_type == "BOOLEAN"
+    assert sent_schema[2].name == "float_col"
+    assert sent_schema[2].field_type == "FLOAT"
+    assert sent_schema[3].name == "string_col"
+    assert sent_schema[3].field_type == "STRING"
+    # TODO: Disambiguate TIMESTAMP from DATETIME based on if column is
+    #       localized.
+    assert sent_schema[4].name == "datetime_col"
+    assert sent_schema[4].field_type == "TIMESTAMP"
+    assert sent_schema[5].name == "timestamp_col"
+    assert sent_schema[5].field_type == "TIMESTAMP"
+
+
 @pytest.mark.parametrize(
     ["bigquery_has_from_dataframe_with_csv", "api_method"],
     [(True, "load_parquet"), (True, "load_csv"), (False, "load_csv")],

From ed3f9d9e624be918587927738c961191e46e383c Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Wed, 22 Dec 2021 11:15:31 -0600
Subject: [PATCH 19/20] Revert "boost coverage"

This reverts commit a9075df20e378ee507d1e28019ce5ecfb6741ada.
---
 pandas_gbq/load.py      |  7 +-----
 pandas_gbq/schema.py    |  2 --
 tests/unit/test_load.py | 56 -----------------------------------------
 3 files changed, 1 insertion(+), 64 deletions(-)

diff --git a/pandas_gbq/load.py b/pandas_gbq/load.py
index 588a6719..315ad5cd 100644
--- a/pandas_gbq/load.py
+++ b/pandas_gbq/load.py
@@ -185,11 +185,6 @@ def load_csv_from_file(
     chunksize: Optional[int],
     schema: Optional[Dict[str, Any]],
 ):
-    """Manually encode a DataFrame to CSV and use the buffer in a load job.
-
-    This method is needed for writing with google-cloud-bigquery versions that
-    don't implment load_table_from_dataframe with the CSV serialization format.
-    """
     if schema is None:
         schema = pandas_gbq.schema.generate_bq_schema(dataframe)
 
@@ -208,7 +203,7 @@ def load_chunk(chunk, job_config):
         finally:
             chunk_buffer.close()
 
-    return load_csv(dataframe, chunksize, bq_schema, load_chunk)
+    return load_csv(dataframe, chunksize, bq_schema, load_chunk,)
 
 
 def load_chunks(
diff --git a/pandas_gbq/schema.py b/pandas_gbq/schema.py
index 118e00f0..e2f97455 100644
--- a/pandas_gbq/schema.py
+++ b/pandas_gbq/schema.py
@@ -101,8 +101,6 @@ def generate_bq_schema(dataframe, default_type="STRING"):
         "S": "STRING",
         "U": "STRING",
         "M": "TIMESTAMP",
-        # TODO: Disambiguate TIMESTAMP from DATETIME based on if column is
-        #       localized.
     }
 
     fields = []
diff --git a/tests/unit/test_load.py b/tests/unit/test_load.py
index 3f32bff9..8e18cfb9 100644
--- a/tests/unit/test_load.py
+++ b/tests/unit/test_load.py
@@ -95,62 +95,6 @@ def test_encode_chunks_with_chunksize_none():
     assert len(chunk.index) == 6
 
 
-def test_load_csv_from_file_generates_schema(mock_bigquery_client):
-    import google.cloud.bigquery
-
-    df = pandas.DataFrame(
-        {
-            "int_col": [1, 2, 3],
-            "bool_col": [True, False, True],
-            "float_col": [0.0, 1.25, -2.75],
-            "string_col": ["a", "b", "c"],
-            "datetime_col": pandas.Series(
-                [
-                    "2021-12-21 13:28:40.123789",
-                    "2000-01-01 11:10:09",
-                    "2040-10-31 23:59:59.999999",
-                ],
-                dtype="datetime64[ns]",
-            ),
-            "timestamp_col": pandas.Series(
-                [
-                    "2021-12-21 13:28:40.123789",
-                    "2000-01-01 11:10:09",
-                    "2040-10-31 23:59:59.999999",
-                ],
-                dtype="datetime64[ns]",
-            ).dt.tz_localize(datetime.timezone.utc),
-        }
-    )
-    destination = google.cloud.bigquery.TableReference.from_string(
-        "my-project.my_dataset.my_table"
-    )
-
-    _ = list(
-        load.load_csv_from_file(mock_bigquery_client, df, destination, None, None, None)
-    )
-
-    mock_load = mock_bigquery_client.load_table_from_file
-    assert mock_load.called
-    _, kwargs = mock_load.call_args
-    assert "job_config" in kwargs
-    sent_schema = kwargs["job_config"].schema
-    assert sent_schema[0].name == "int_col"
-    assert sent_schema[0].field_type == "INTEGER"
-    assert sent_schema[1].name == "bool_col"
-    assert sent_schema[1].field_type == "BOOLEAN"
-    assert sent_schema[2].name == "float_col"
-    assert sent_schema[2].field_type == "FLOAT"
-    assert sent_schema[3].name == "string_col"
-    assert sent_schema[3].field_type == "STRING"
-    # TODO: Disambiguate TIMESTAMP from DATETIME based on if column is
-    #       localized.
-    assert sent_schema[4].name == "datetime_col"
-    assert sent_schema[4].field_type == "TIMESTAMP"
-    assert sent_schema[5].name == "timestamp_col"
-    assert sent_schema[5].field_type == "TIMESTAMP"
-
-
 @pytest.mark.parametrize(
     ["bigquery_has_from_dataframe_with_csv", "api_method"],
     [(True, "load_parquet"), (True, "load_csv"), (False, "load_csv")],

From b9b017cc85cf8f5b28666f6918f6efb7a8a25fc7 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Wed, 22 Dec 2021 12:32:07 -0600
Subject: [PATCH 20/20] don't cover type checking only code, more generic type
 annotation

---
 pandas_gbq/gbq.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index cc6bef1f..5dcc3fd0 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -8,14 +8,14 @@
 import warnings
 from datetime import datetime
 import typing
-from typing import Dict, Optional, Union
+from typing import Any, Dict, Optional, Union
 
 import numpy as np
 
 # Only import at module-level at type checking time to avoid circular
 # dependencies in the pandas package, which has an optional dependency on
 # pandas-gbq.
-if typing.TYPE_CHECKING:
+if typing.TYPE_CHECKING:  # pragma: NO COVER
     import pandas
 
 # Required dependencies, but treat as optional so that _test_google_api_imports
@@ -392,9 +392,7 @@ def download_table(
         table_id: str,
         max_results: Optional[int] = None,
         progress_bar_type: Optional[str] = None,
-        dtypes: Dict[
-            str, Union[str, "pandas.api.extensions.ExtensionDtype", np.dtype]
-        ] = None,
+        dtypes: Optional[Dict[str, Union[str, Any]]] = None,
     ) -> "pandas.DataFrame":
         self._start_timer()