From 0730f6ac7f261d0e0885e4c94b78df76f4ac974e Mon Sep 17 00:00:00 2001
From: Maximilian Roos <m@maxroos.com>
Date: Fri, 28 Sep 2018 15:33:23 -0400
Subject: [PATCH 01/14] parse all datetime types

---
 pandas_gbq/gbq.py        |  8 +++++++-
 tests/system/test_gbq.py | 19 ++++++++++++++++++-
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 79cd1aba..d9f208e8 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -577,7 +577,13 @@ def _parse_schema(schema_fields):
     # see:
     # http://pandas.pydata.org/pandas-docs/dev/missing_data.html
     # #missing-data-casting-rules-and-indexing
-    dtype_map = {"FLOAT": np.dtype(float), "TIMESTAMP": "M8[ns]"}
+    dtype_map = {
+        "FLOAT": np.dtype(float),
+        "TIMESTAMP": "datetime64[ns]",
+        "TIME": "datetime64[ns]",
+        "DATE": "datetime64[ns]",
+        "DATETIME": "datetime64[ns]",
+    }
 
     for field in schema_fields:
         name = str(field["name"])
diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py
index ba85b4c2..ced52fd0 100644
--- a/tests/system/test_gbq.py
+++ b/tests/system/test_gbq.py
@@ -1,8 +1,8 @@
 # -*- coding: utf-8 -*-
 
 import sys
-from datetime import datetime
 import uuid
+from datetime import datetime
 
 import numpy as np
 import pandas.util.testing as tm
@@ -338,6 +338,23 @@ def test_should_properly_handle_arbitrary_timestamp(self, project_id):
             ),
         )
 
+    @pytest.mark.parametrize(
+        "date_type", ["DATE", "DATETIME", "TIMESTAMP", "TIME"]
+    )
+    def test_should_properly_handle_all_timestamp_types(
+        self, project_id, date_type
+    ):
+        query = 'SELECT {typ}("2004-09-15") AS valid_timestamp'.format(
+            date_type
+        )
+        df = gbq.read_gbq(
+            query,
+            project_id=project_id,
+            private_key=self.credentials,
+            dialect="legacy",
+        )
+        assert df["valid_timestamp"].dtype == "<M8[ns]"
+
     def test_should_properly_handle_null_timestamp(self, project_id):
         query = "SELECT TIMESTAMP(NULL) AS null_timestamp"
         df = gbq.read_gbq(

From 3b5c028a8b1c4eb9c0685cc21333bc7426035d6e Mon Sep 17 00:00:00 2001
From: Maximilian Roos <m@maxroos.com>
Date: Fri, 28 Sep 2018 15:42:07 -0400
Subject: [PATCH 02/14] typo

---
 tests/system/test_gbq.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py
index ced52fd0..37de8c19 100644
--- a/tests/system/test_gbq.py
+++ b/tests/system/test_gbq.py
@@ -345,7 +345,7 @@ def test_should_properly_handle_all_timestamp_types(
         self, project_id, date_type
     ):
         query = 'SELECT {typ}("2004-09-15") AS valid_timestamp'.format(
-            date_type
+            typ=date_type
         )
         df = gbq.read_gbq(
             query,

From a98f998aca09e771714cb8ae21b545f7bb3f862f Mon Sep 17 00:00:00 2001
From: Maximilian Roos <m@maxroos.com>
Date: Fri, 28 Sep 2018 15:50:12 -0400
Subject: [PATCH 03/14] new black version

---
 pandas_gbq/gbq.py         |  2 +-
 tests/system/test_gbq.py  | 12 ++++++------
 tests/unit/test_schema.py |  1 -
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index d9f208e8..b39489e0 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -283,7 +283,7 @@ def __init__(
 
         # BQ Queries costs $5 per TB. First 1 TB per month is free
         # see here for more: https://cloud.google.com/bigquery/pricing
-        self.query_price_for_TB = 5. / 2 ** 40  # USD/TB
+        self.query_price_for_TB = 5.0 / 2 ** 40  # USD/TB
 
     def _start_timer(self):
         self.start = time.time()
diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py
index 37de8c19..cf299ce1 100644
--- a/tests/system/test_gbq.py
+++ b/tests/system/test_gbq.py
@@ -758,12 +758,12 @@ def test_query_response_bytes(self):
         assert self.gbq_connector.sizeof_fmt(1048576) == "1.0 MB"
         assert self.gbq_connector.sizeof_fmt(1048576000) == "1000.0 MB"
         assert self.gbq_connector.sizeof_fmt(1073741824) == "1.0 GB"
-        assert self.gbq_connector.sizeof_fmt(1.099512E12) == "1.0 TB"
-        assert self.gbq_connector.sizeof_fmt(1.125900E15) == "1.0 PB"
-        assert self.gbq_connector.sizeof_fmt(1.152922E18) == "1.0 EB"
-        assert self.gbq_connector.sizeof_fmt(1.180592E21) == "1.0 ZB"
-        assert self.gbq_connector.sizeof_fmt(1.208926E24) == "1.0 YB"
-        assert self.gbq_connector.sizeof_fmt(1.208926E28) == "10000.0 YB"
+        assert self.gbq_connector.sizeof_fmt(1.099512e12) == "1.0 TB"
+        assert self.gbq_connector.sizeof_fmt(1.125900e15) == "1.0 PB"
+        assert self.gbq_connector.sizeof_fmt(1.152922e18) == "1.0 EB"
+        assert self.gbq_connector.sizeof_fmt(1.180592e21) == "1.0 ZB"
+        assert self.gbq_connector.sizeof_fmt(1.208926e24) == "1.0 YB"
+        assert self.gbq_connector.sizeof_fmt(1.208926e28) == "10000.0 YB"
 
     def test_struct(self, project_id):
         query = """SELECT 1 int_field,
diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py
index 66aca1dc..74f22f29 100644
--- a/tests/unit/test_schema.py
+++ b/tests/unit/test_schema.py
@@ -1,4 +1,3 @@
-
 import datetime
 
 import pandas

From 28430618aef552509106a6497962d162b35917a5 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <m@maxroos.com>
Date: Fri, 28 Sep 2018 18:21:51 -0400
Subject: [PATCH 04/14] I tihnk we're doing similar things twice

---
 pandas_gbq/gbq.py | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index b39489e0..83af008c 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -2,7 +2,6 @@
 import os
 import time
 import warnings
-from collections import OrderedDict
 from datetime import datetime
 
 import numpy as np
@@ -583,6 +582,8 @@ def _parse_schema(schema_fields):
         "TIME": "datetime64[ns]",
         "DATE": "datetime64[ns]",
         "DATETIME": "datetime64[ns]",
+        "BOOLEAN": bool,
+        "INTEGER": np.int64,
     }
 
     for field in schema_fields:
@@ -590,17 +591,19 @@ def _parse_schema(schema_fields):
         if field["mode"].upper() == "REPEATED":
             yield name, object
         else:
-            dtype = dtype_map.get(field["type"].upper(), object)
+            dtype = dtype_map.get(field["type"].upper())
             yield name, dtype
 
 
 def _parse_data(schema, rows):
 
-    column_dtypes = OrderedDict(_parse_schema(schema["fields"]))
+    column_dtypes = dict(_parse_schema(schema["fields"]))
 
     df = DataFrame(data=(iter(r) for r in rows), columns=column_dtypes.keys())
     for column in df:
-        df[column] = df[column].astype(column_dtypes[column])
+        dtype = column_dtypes[column]
+        if dtype:
+            df[column] = df[column].astype(column_dtypes[column])
     return df
 
 
@@ -755,16 +758,16 @@ def read_gbq(
 
     # cast BOOLEAN and INTEGER columns from object to bool/int
     # if they dont have any nulls AND field mode is not repeated (i.e., array)
-    type_map = {"BOOLEAN": bool, "INTEGER": np.int64}
-    for field in schema["fields"]:
-        if (
-            field["type"].upper() in type_map
-            and final_df[field["name"]].notnull().all()
-            and field["mode"].lower() != "repeated"
-        ):
-            final_df[field["name"]] = final_df[field["name"]].astype(
-                type_map[field["type"].upper()]
-            )
+    # type_map = {"BOOLEAN": bool, "INTEGER": np.int64}
+    # for field in schema["fields"]:
+    #     if (
+    #         field["type"].upper() in type_map
+    #         and final_df[field["name"]].notnull().all()
+    #         and field["mode"].lower() != "repeated"
+    #     ):
+    #         final_df[field["name"]] = final_df[field["name"]].astype(
+    #             type_map[field["type"].upper()]
+    #         )
 
     connector.log_elapsed_seconds(
         "Total time taken",

From 541e96f63fb1a0fcfb20620c28b858d85481a90f Mon Sep 17 00:00:00 2001
From: Maximilian Roos <m@maxroos.com>
Date: Fri, 28 Sep 2018 18:22:06 -0400
Subject: [PATCH 05/14] better type tests

---
 tests/system/test_gbq.py | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py
index cf299ce1..42b66989 100644
--- a/tests/system/test_gbq.py
+++ b/tests/system/test_gbq.py
@@ -339,21 +339,30 @@ def test_should_properly_handle_arbitrary_timestamp(self, project_id):
         )
 
     @pytest.mark.parametrize(
-        "date_type", ["DATE", "DATETIME", "TIMESTAMP", "TIME"]
+        "expression, type_",
+        [
+            ("current_date()", "<M8[ns]"),
+            ("current_timestamp()", "<M8[ns]"),
+            ("current_datetime()", "<M8[ns]"),
+        ],
     )
-    def test_should_properly_handle_all_timestamp_types(
-        self, project_id, date_type
-    ):
-        query = 'SELECT {typ}("2004-09-15") AS valid_timestamp'.format(
-            typ=date_type
-        )
+    def test_return_correct_types(self, project_id, expression, type_):
+        """
+        All type checks can be added to this function using additional
+        parameters, rather than creating additional functions.
+        We can consolidate the existing functions here in time
+
+        TODO: time doesn't currently parse
+        ("time(12,30,00)", "<M8[ns]"),
+        """
+        query = "SELECT {} AS _".format(expression)
         df = gbq.read_gbq(
             query,
             project_id=project_id,
             private_key=self.credentials,
-            dialect="legacy",
+            dialect="standard",
         )
-        assert df["valid_timestamp"].dtype == "<M8[ns]"
+        assert df["_"].dtype == type_
 
     def test_should_properly_handle_null_timestamp(self, project_id):
         query = "SELECT TIMESTAMP(NULL) AS null_timestamp"

From 8b146dacd7b2d08fc17554bc4d1bd916115a4a9d Mon Sep 17 00:00:00 2001
From: Maximilian Roos <m@maxroos.com>
Date: Fri, 28 Sep 2018 18:31:51 -0400
Subject: [PATCH 06/14] check nulls before assigning type

---
 pandas_gbq/gbq.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 83af008c..e4fb878d 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -603,7 +603,9 @@ def _parse_data(schema, rows):
     for column in df:
         dtype = column_dtypes[column]
         if dtype:
-            df[column] = df[column].astype(column_dtypes[column])
+            df[column] = df[column].astype(
+                column_dtypes[column], errors="ignore"
+            )
     return df
 
 

From 9cbeb1f99993b904d01a6138b6e4ba2841c8cfb2 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <m@maxroos.com>
Date: Fri, 28 Sep 2018 18:33:39 -0400
Subject: [PATCH 07/14] add env to gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index f0dd6fbd..9fb09906 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,6 +22,7 @@
 .pytest_cache
 .testmon*
 .vscode/
+.env
 
 # Docs #
 ########

From dd276be316b7a20b59e0e69fc84438e33e9850e2 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <m@maxroos.com>
Date: Fri, 28 Sep 2018 18:34:11 -0400
Subject: [PATCH 08/14] remove old code

---
 pandas_gbq/gbq.py | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index e4fb878d..5dc8bc86 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -758,19 +758,6 @@ def read_gbq(
                 "Column order does not match this DataFrame."
             )
 
-    # cast BOOLEAN and INTEGER columns from object to bool/int
-    # if they dont have any nulls AND field mode is not repeated (i.e., array)
-    # type_map = {"BOOLEAN": bool, "INTEGER": np.int64}
-    # for field in schema["fields"]:
-    #     if (
-    #         field["type"].upper() in type_map
-    #         and final_df[field["name"]].notnull().all()
-    #         and field["mode"].lower() != "repeated"
-    #     ):
-    #         final_df[field["name"]] = final_df[field["name"]].astype(
-    #             type_map[field["type"].upper()]
-    #         )
-
     connector.log_elapsed_seconds(
         "Total time taken",
         datetime.now().strftime("s.\nFinished at %Y-%m-%d %H:%M:%S."),

From a933d7dc9bcbd6995d0bc91cb6bc057653f7e68b Mon Sep 17 00:00:00 2001
From: Maximilian Roos <m@maxroos.com>
Date: Fri, 28 Sep 2018 19:21:52 -0400
Subject: [PATCH 09/14] handle float and int columns re nulls

---
 pandas_gbq/gbq.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 5dc8bc86..8e96a935 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -598,11 +598,16 @@ def _parse_schema(schema_fields):
 def _parse_data(schema, rows):
 
     column_dtypes = dict(_parse_schema(schema["fields"]))
-
     df = DataFrame(data=(iter(r) for r in rows), columns=column_dtypes.keys())
+
     for column in df:
         dtype = column_dtypes[column]
-        if dtype:
+        null_safe = (
+            df[column].notnull().all()
+            or dtype == float
+            or dtype == "datetime64[ns]"
+        )
+        if dtype and null_safe:
             df[column] = df[column].astype(
                 column_dtypes[column], errors="ignore"
             )

From c4c8cac7227917303927473bf2b31b66e8e4ac9e Mon Sep 17 00:00:00 2001
From: Maximilian Roos <m@maxroos.com>
Date: Fri, 28 Sep 2018 19:22:39 -0400
Subject: [PATCH 10/14] nullable int columns as floats (separate issue)

---
 tests/system/test_gbq.py | 28 ++++------------------------
 1 file changed, 4 insertions(+), 24 deletions(-)

diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py
index 42b66989..96a0eec8 100644
--- a/tests/system/test_gbq.py
+++ b/tests/system/test_gbq.py
@@ -200,9 +200,7 @@ def test_should_properly_handle_nullable_integers(self, project_id):
             private_key=self.credentials,
             dialect="legacy",
         )
-        tm.assert_frame_equal(
-            df, DataFrame({"nullable_integer": [1, None]}).astype(object)
-        )
+        tm.assert_frame_equal(df, DataFrame({"nullable_integer": [1, None]}))
 
     def test_should_properly_handle_valid_longs(self, project_id):
         query = "SELECT 1 << 62 AS valid_long"
@@ -225,7 +223,7 @@ def test_should_properly_handle_nullable_longs(self, project_id):
             dialect="legacy",
         )
         tm.assert_frame_equal(
-            df, DataFrame({"nullable_long": [1 << 62, None]}).astype(object)
+            df, DataFrame({"nullable_long": [1 << 62, None]})
         )
 
     def test_should_properly_handle_null_integers(self, project_id):
@@ -344,6 +342,8 @@ def test_should_properly_handle_arbitrary_timestamp(self, project_id):
             ("current_date()", "<M8[ns]"),
             ("current_timestamp()", "<M8[ns]"),
             ("current_datetime()", "<M8[ns]"),
+            ("TRUE", bool),
+            ("FALSE", bool),
         ],
     )
     def test_return_correct_types(self, project_id, expression, type_):
@@ -374,26 +374,6 @@ def test_should_properly_handle_null_timestamp(self, project_id):
         )
         tm.assert_frame_equal(df, DataFrame({"null_timestamp": [NaT]}))
 
-    def test_should_properly_handle_true_boolean(self, project_id):
-        query = "SELECT BOOLEAN(TRUE) AS true_boolean"
-        df = gbq.read_gbq(
-            query,
-            project_id=project_id,
-            private_key=self.credentials,
-            dialect="legacy",
-        )
-        tm.assert_frame_equal(df, DataFrame({"true_boolean": [True]}))
-
-    def test_should_properly_handle_false_boolean(self, project_id):
-        query = "SELECT BOOLEAN(FALSE) AS false_boolean"
-        df = gbq.read_gbq(
-            query,
-            project_id=project_id,
-            private_key=self.credentials,
-            dialect="legacy",
-        )
-        tm.assert_frame_equal(df, DataFrame({"false_boolean": [False]}))
-
     def test_should_properly_handle_null_boolean(self, project_id):
         query = "SELECT BOOLEAN(NULL) AS null_boolean"
         df = gbq.read_gbq(

From 6f5f0905dee4bec33be7e2aa56f744fb5a661699 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <m@maxroos.com>
Date: Fri, 28 Sep 2018 19:36:41 -0400
Subject: [PATCH 11/14] Chesterton's Fence

---
 pandas_gbq/gbq.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 8e96a935..01cf55c1 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -2,6 +2,7 @@
 import os
 import time
 import warnings
+from collections import OrderedDict
 from datetime import datetime
 
 import numpy as np
@@ -597,7 +598,7 @@ def _parse_schema(schema_fields):
 
 def _parse_data(schema, rows):
 
-    column_dtypes = dict(_parse_schema(schema["fields"]))
+    column_dtypes = OrderedDict(_parse_schema(schema["fields"]))
     df = DataFrame(data=(iter(r) for r in rows), columns=column_dtypes.keys())
 
     for column in df:

From f05f006b813cc478b374814dbdb7ff34365f1a94 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <m@maxroos.com>
Date: Fri, 28 Sep 2018 23:21:27 -0400
Subject: [PATCH 12/14] try falling back to standard black check

---
 noxfile.py | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/noxfile.py b/noxfile.py
index 7a76559e..104a34c4 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -77,13 +77,7 @@ def test_latest_deps(session, python=latest_python):
 @nox.session
 def lint(session, python=latest_python):
     session.install("black")
-    session.run(
-        "black",
-        "--check",
-        "--exclude",
-        "(\.git|\.hg|\.mypy_cache|\.tox|\.nox|\.venv|_build|buck-out|build|dist)",
-        ".",
-    )
+    session.run("black", "--check", ".")
 
 
 @nox.session

From 418084abf52cbd59b8fa0e85d8eb4b9a938e404b Mon Sep 17 00:00:00 2001
From: Maximilian Roos <m@maxroos.com>
Date: Sat, 29 Sep 2018 00:30:41 -0400
Subject: [PATCH 13/14] exclude nox

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index 90440f59..318a0442 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,4 +4,5 @@ exclude = '''
 versioneer.py
 | _version.py
 | docs
+| .nox
 '''
\ No newline at end of file

From d1ca352eca9135f686b9925892e48b2ab4b9fa41 Mon Sep 17 00:00:00 2001
From: Maximilian Roos <m@maxroos.com>
Date: Wed, 10 Oct 2018 13:00:50 -0400
Subject: [PATCH 14/14] changelog

---
 docs/source/changelog.rst | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
index 28cecbca..68dc8d60 100644
--- a/docs/source/changelog.rst
+++ b/docs/source/changelog.rst
@@ -6,6 +6,10 @@ Changelog
 0.7.0 / [unreleased]
 --------------------
 
+- `int` columns which contain `NULL` are now cast to `float`, rather than
+  `object` type. (:issue:`174`)
+- `DATE`, `DATETIME` and `TIMESTAMP` columns are now parsed as pandas' `timestamp`
+  objects (:issue:`224`)
 - Add :class:`pandas_gbq.Context` to cache credentials in-memory, across
   calls to ``read_gbq`` and ``to_gbq``. (:issue:`198`, :issue:`208`)
 - Fast queries now do not log above ``DEBUG`` level. (:issue:`204`)
@@ -20,6 +24,8 @@ Internal changes
 ~~~~~~~~~~~~~~~~
 
 - Avoid listing datasets and tables in system tests. (:issue:`215`)
+- Improved performance from eliminating some duplicative parsing steps
+  (:issue:`224`)
 
 .. _changelog-0.6.1: