feat: add support for INTERVAL data type to list_rows (#840)

tswast · gcf-owl-bot[bot] · plamut · web-flow · commit e37380a959cb · 2021-10-26T11:04:13.000-05:00
* test: refactor `list_rows` tests and add test for scalars * WIP: INTERVAL support * feat: add support for INTERVAL data type to `list_rows` * fix relativedelta construction for non-microseconds * WIP: support INTERVAL query params * remove dead code * INTERVAL not supported in query parameters * revert query parameter changes * add validation error for interval * add unit tests for extreme intervals * add dateutil to intersphinx * use dictionary for intersphinx * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * add test case for trailing . * explicit none * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * truncate nanoseconds * use \d group for digits * use \d for consistency Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com> Co-authored-by: Peter Lamut <plamut@users.noreply.github.com>
diff --git a/docs/conf.py b/docs/conf.py
@@ -366,8 +366,9 @@
     "grpc": ("https://grpc.github.io/grpc/python/", None),
     "proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None),
     "protobuf": ("https://googleapis.dev/python/protobuf/latest/", None),
-    "pandas": ("http://pandas.pydata.org/pandas-docs/stable/", None),
+    "dateutil": ("https://dateutil.readthedocs.io/en/latest/", None),
     "geopandas": ("https://geopandas.org/", None),
+    "pandas": ("https://pandas.pydata.org/pandas-docs/dev", None),
 }
 
 
diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py
@@ -19,8 +19,9 @@
 import decimal
 import math
 import re
-from typing import Any, Union
+from typing import Any, Optional, Union
 
+from dateutil import relativedelta
 from google.cloud._helpers import UTC
 from google.cloud._helpers import _date_from_iso8601_date
 from google.cloud._helpers import _datetime_from_microseconds
@@ -45,6 +46,14 @@
     re.VERBOSE,
 )
 
+# BigQuery sends INTERVAL data in "canonical format"
+# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#interval_type
+_INTERVAL_PATTERN = re.compile(
+    r"(?P<calendar_sign>-?)(?P<years>\d+)-(?P<months>\d+) "
+    r"(?P<days>-?\d+) "
+    r"(?P<time_sign>-?)(?P<hours>\d+):(?P<minutes>\d+):(?P<seconds>\d+)\.?(?P<fraction>\d*)?$"
+)
+
 _MIN_PYARROW_VERSION = packaging.version.Version("3.0.0")
 _MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0")
 _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0")
@@ -191,6 +200,41 @@ def _int_from_json(value, field):
         return int(value)
 
 
+def _interval_from_json(
+    value: Optional[str], field
+) -> Optional[relativedelta.relativedelta]:
+    """Coerce 'value' to an interval, if set or not nullable."""
+    if not _not_null(value, field):
+        return None
+    if value is None:
+        raise TypeError(f"got {value} for REQUIRED field: {repr(field)}")
+
+    parsed = _INTERVAL_PATTERN.match(value)
+    if parsed is None:
+        raise ValueError(f"got interval: '{value}' with unexpected format")
+
+    calendar_sign = -1 if parsed.group("calendar_sign") == "-" else 1
+    years = calendar_sign * int(parsed.group("years"))
+    months = calendar_sign * int(parsed.group("months"))
+    days = int(parsed.group("days"))
+    time_sign = -1 if parsed.group("time_sign") == "-" else 1
+    hours = time_sign * int(parsed.group("hours"))
+    minutes = time_sign * int(parsed.group("minutes"))
+    seconds = time_sign * int(parsed.group("seconds"))
+    fraction = parsed.group("fraction")
+    microseconds = time_sign * int(fraction.ljust(6, "0")[:6]) if fraction else 0
+
+    return relativedelta.relativedelta(
+        years=years,
+        months=months,
+        days=days,
+        hours=hours,
+        minutes=minutes,
+        seconds=seconds,
+        microseconds=microseconds,
+    )
+
+
 def _float_from_json(value, field):
     """Coerce 'value' to a float, if set or not nullable."""
     if _not_null(value, field):
@@ -327,6 +371,7 @@ def _record_from_json(value, field):
 _CELLDATA_FROM_JSON = {
     "INTEGER": _int_from_json,
     "INT64": _int_from_json,
+    "INTERVAL": _interval_from_json,
     "FLOAT": _float_from_json,
     "FLOAT64": _float_from_json,
     "NUMERIC": _decimal_from_json,
diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py
@@ -254,6 +254,7 @@ class SqlTypeNames(str, enum.Enum):
     DATE = "DATE"
     TIME = "TIME"
     DATETIME = "DATETIME"
+    INTERVAL = "INTERVAL"  # NOTE: not available in legacy types
 
 
 class SqlParameterScalarTypes:
diff --git a/owlbot.py b/owlbot.py
@@ -98,8 +98,9 @@
     microgenerator=True,
     split_system_tests=True,
     intersphinx_dependencies={
-        "pandas": "http://pandas.pydata.org/pandas-docs/stable/",
+        "dateutil": "https://dateutil.readthedocs.io/en/latest/",
         "geopandas": "https://geopandas.org/",
+        "pandas": "https://pandas.pydata.org/pandas-docs/dev",
     },
 )
 
@@ -115,10 +116,6 @@
         # Include custom SNIPPETS_TESTS job for performance.
         # https://github.com/googleapis/python-bigquery/issues/191
         ".kokoro/presubmit/presubmit.cfg",
-        # Group all renovate PRs together. If this works well, remove this and
-        # update the shared templates (possibly with configuration option to
-        # py_library.)
-        "renovate.json",
     ],
 )
 
diff --git a/renovate.json b/renovate.json
@@ -1,6 +1,9 @@
 {
   "extends": [
-    "config:base",  "group:all", ":preserveSemverRanges"
+    "config:base",
+    "group:all",
+    ":preserveSemverRanges",
+    ":disableDependencyDashboard"
   ],
   "ignorePaths": [".pre-commit-config.yaml"],
   "pip_requirements": {
diff --git a/setup.py b/setup.py
@@ -42,6 +42,7 @@
     "google-resumable-media >= 0.6.0, < 3.0dev",
     "packaging >= 14.3",
     "protobuf >= 3.12.0",
+    "python-dateutil >= 2.7.2, <3.0dev",
     "requests >= 2.18.0, < 3.0.0dev",
 ]
 extras = {
diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt
@@ -18,6 +18,7 @@ pandas==0.24.2
 proto-plus==1.10.0
 protobuf==3.12.0
 pyarrow==3.0.0
+python-dateutil==2.7.2
 requests==2.18.0
 Shapely==1.6.0
 six==1.13.0
diff --git a/tests/system/test_client.py b/tests/system/test_client.py
@@ -37,11 +37,6 @@
 except ImportError:  # pragma: NO COVER
     bigquery_storage = None
 
-try:
-    import fastavro  # to parse BQ storage client results
-except ImportError:  # pragma: NO COVER
-    fastavro = None
-
 try:
     import pyarrow
     import pyarrow.types
diff --git a/tests/system/test_list_rows.py b/tests/system/test_list_rows.py
@@ -15,6 +15,8 @@
 import datetime
 import decimal
 
+from dateutil import relativedelta
+
 from google.cloud import bigquery
 from google.cloud.bigquery import enums
 
@@ -64,6 +66,9 @@ def test_list_rows_scalars(bigquery_client: bigquery.Client, scalars_table: str)
     assert row["datetime_col"] == datetime.datetime(2021, 7, 21, 11, 39, 45)
     assert row["geography_col"] == "POINT(-122.0838511 37.3860517)"
     assert row["int64_col"] == 123456789
+    assert row["interval_col"] == relativedelta.relativedelta(
+        years=7, months=11, days=9, hours=4, minutes=15, seconds=37, microseconds=123456
+    )
     assert row["numeric_col"] == decimal.Decimal("1.23456789")
     assert row["bignumeric_col"] == decimal.Decimal("10.111213141516171819")
     assert row["float64_col"] == 1.25
@@ -95,6 +100,9 @@ def test_list_rows_scalars_extreme(
     assert row["datetime_col"] == datetime.datetime(9999, 12, 31, 23, 59, 59, 999999)
     assert row["geography_col"] == "POINT(-135 90)"
     assert row["int64_col"] == 9223372036854775807
+    assert row["interval_col"] == relativedelta.relativedelta(
+        years=-10000, days=-3660000, hours=-87840000
+    )
     assert row["numeric_col"] == decimal.Decimal(f"9.{'9' * 37}E+28")
     assert row["bignumeric_col"] == decimal.Decimal(f"9.{'9' * 75}E+37")
     assert row["float64_col"] == float("Inf")
diff --git a/tests/unit/helpers/test_from_json.py b/tests/unit/helpers/test_from_json.py
@@ -0,0 +1,157 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from dateutil.relativedelta import relativedelta
+import pytest
+
+from google.cloud.bigquery.schema import SchemaField
+
+
+def create_field(mode="NULLABLE", type_="IGNORED"):
+    return SchemaField("test_field", type_, mode=mode)
+
+
+@pytest.fixture
+def mut():
+    from google.cloud.bigquery import _helpers
+
+    return _helpers
+
+
+def test_interval_from_json_w_none_nullable(mut):
+    got = mut._interval_from_json(None, create_field())
+    assert got is None
+
+
+def test_interval_from_json_w_none_required(mut):
+    with pytest.raises(TypeError):
+        mut._interval_from_json(None, create_field(mode="REQUIRED"))
+
+
+def test_interval_from_json_w_invalid_format(mut):
+    with pytest.raises(ValueError, match="NOT_AN_INTERVAL"):
+        mut._interval_from_json("NOT_AN_INTERVAL", create_field())
+
+
+@pytest.mark.parametrize(
+    ("value", "expected"),
+    (
+        ("0-0 0 0:0:0", relativedelta()),
+        # SELECT INTERVAL X YEAR
+        ("-10000-0 0 0:0:0", relativedelta(years=-10000)),
+        ("-1-0 0 0:0:0", relativedelta(years=-1)),
+        ("1-0 0 0:0:0", relativedelta(years=1)),
+        ("10000-0 0 0:0:0", relativedelta(years=10000)),
+        # SELECT INTERVAL X MONTH
+        ("-0-11 0 0:0:0", relativedelta(months=-11)),
+        ("-0-1 0 0:0:0", relativedelta(months=-1)),
+        ("0-1 0 0:0:0", relativedelta(months=1)),
+        ("0-11 0 0:0:0", relativedelta(months=11)),
+        # SELECT INTERVAL X DAY
+        ("0-0 -3660000 0:0:0", relativedelta(days=-3660000)),
+        ("0-0 -1 0:0:0", relativedelta(days=-1)),
+        ("0-0 1 0:0:0", relativedelta(days=1)),
+        ("0-0 3660000 0:0:0", relativedelta(days=3660000)),
+        # SELECT INTERVAL X HOUR
+        ("0-0 0 -87840000:0:0", relativedelta(hours=-87840000)),
+        ("0-0 0 -1:0:0", relativedelta(hours=-1)),
+        ("0-0 0 1:0:0", relativedelta(hours=1)),
+        ("0-0 0 87840000:0:0", relativedelta(hours=87840000)),
+        # SELECT INTERVAL X MINUTE
+        ("0-0 0 -0:59:0", relativedelta(minutes=-59)),
+        ("0-0 0 -0:1:0", relativedelta(minutes=-1)),
+        ("0-0 0 0:1:0", relativedelta(minutes=1)),
+        ("0-0 0 0:59:0", relativedelta(minutes=59)),
+        # SELECT INTERVAL X SECOND
+        ("0-0 0 -0:0:59", relativedelta(seconds=-59)),
+        ("0-0 0 -0:0:1", relativedelta(seconds=-1)),
+        ("0-0 0 0:0:1", relativedelta(seconds=1)),
+        ("0-0 0 0:0:59", relativedelta(seconds=59)),
+        # SELECT (INTERVAL -1 SECOND) / 1000000
+        ("0-0 0 -0:0:0.000001", relativedelta(microseconds=-1)),
+        ("0-0 0 -0:0:59.999999", relativedelta(seconds=-59, microseconds=-999999)),
+        ("0-0 0 -0:0:59.999", relativedelta(seconds=-59, microseconds=-999000)),
+        ("0-0 0 0:0:59.999", relativedelta(seconds=59, microseconds=999000)),
+        ("0-0 0 0:0:59.999999", relativedelta(seconds=59, microseconds=999999)),
+        # Test with multiple digits in each section.
+        (
+            "32-11 45 67:16:23.987654",
+            relativedelta(
+                years=32,
+                months=11,
+                days=45,
+                hours=67,
+                minutes=16,
+                seconds=23,
+                microseconds=987654,
+            ),
+        ),
+        (
+            "-32-11 -45 -67:16:23.987654",
+            relativedelta(
+                years=-32,
+                months=-11,
+                days=-45,
+                hours=-67,
+                minutes=-16,
+                seconds=-23,
+                microseconds=-987654,
+            ),
+        ),
+        # Test with mixed +/- sections.
+        (
+            "9999-9 -999999 9999999:59:59.999999",
+            relativedelta(
+                years=9999,
+                months=9,
+                days=-999999,
+                hours=9999999,
+                minutes=59,
+                seconds=59,
+                microseconds=999999,
+            ),
+        ),
+        # Test with fraction that is not microseconds.
+        ("0-0 0 0:0:42.", relativedelta(seconds=42)),
+        ("0-0 0 0:0:59.1", relativedelta(seconds=59, microseconds=100000)),
+        ("0-0 0 0:0:0.12", relativedelta(microseconds=120000)),
+        ("0-0 0 0:0:0.123", relativedelta(microseconds=123000)),
+        ("0-0 0 0:0:0.1234", relativedelta(microseconds=123400)),
+        # Fractional seconds can cause rounding problems if cast to float. See:
+        # https://github.com/googleapis/python-db-dtypes-pandas/issues/18
+        ("0-0 0 0:0:59.876543", relativedelta(seconds=59, microseconds=876543)),
+        (
+            "0-0 0 01:01:01.010101",
+            relativedelta(hours=1, minutes=1, seconds=1, microseconds=10101),
+        ),
+        (
+            "0-0 0 09:09:09.090909",
+            relativedelta(hours=9, minutes=9, seconds=9, microseconds=90909),
+        ),
+        (
+            "0-0 0 11:11:11.111111",
+            relativedelta(hours=11, minutes=11, seconds=11, microseconds=111111),
+        ),
+        (
+            "0-0 0 19:16:23.987654",
+            relativedelta(hours=19, minutes=16, seconds=23, microseconds=987654),
+        ),
+        # Nanoseconds are not expected, but should not cause error.
+        ("0-0 0 0:0:00.123456789", relativedelta(microseconds=123456)),
+        ("0-0 0 0:0:59.87654321", relativedelta(seconds=59, microseconds=876543)),
+    ),
+)
+def test_w_string_values(mut, value, expected):
+    got = mut._interval_from_json(value, create_field())
+    assert got == expected

Original file line number	Diff line number	Diff line change
`@@ -366,8 +366,9 @@`
`366`	`366`	`"grpc": ("https://grpc.github.io/grpc/python/", None),`
`367`	`367`	`"proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None),`
`368`	`368`	`"protobuf": ("https://googleapis.dev/python/protobuf/latest/", None),`
`369`		`- "pandas": ("http://pandas.pydata.org/pandas-docs/stable/", None),`
	`369`	`+ "dateutil": ("https://dateutil.readthedocs.io/en/latest/", None),`
`370`	`370`	`"geopandas": ("https://geopandas.org/", None),`
	`371`	`+ "pandas": ("https://pandas.pydata.org/pandas-docs/dev", None),`
`371`	`372`	`}`
`372`	`373`
`373`	`374`
Original file line number	Diff line number	Diff line change
`@@ -42,6 +42,7 @@`
`42`	`42`	`"google-resumable-media >= 0.6.0, < 3.0dev",`
`43`	`43`	`"packaging >= 14.3",`
`44`	`44`	`"protobuf >= 3.12.0",`
	`45`	`+ "python-dateutil >= 2.7.2, <3.0dev",`
`45`	`46`	`"requests >= 2.18.0, < 3.0.0dev",`
`46`	`47`	`]`
`47`	`48`	`extras = {`