Skip to content

Commit e37380a

Browse files
tswastgcf-owl-bot[bot]plamut
authored
feat: add support for INTERVAL data type to list_rows (#840)
* test: refactor `list_rows` tests and add test for scalars * WIP: INTERVAL support * feat: add support for INTERVAL data type to `list_rows` * fix relativedelta construction for non-microseconds * WIP: support INTERVAL query params * remove dead code * INTERVAL not supported in query parameters * revert query parameter changes * add validation error for interval * add unit tests for extreme intervals * add dateutil to intersphinx * use dictionary for intersphinx * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * add test case for trailing . * explicit none * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * truncate nanoseconds * use \d group for digits * use \d for consistency Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com> Co-authored-by: Peter Lamut <[email protected]>
1 parent 1f864fd commit e37380a

File tree

10 files changed

+222
-13
lines changed

10 files changed

+222
-13
lines changed

docs/conf.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -366,8 +366,9 @@
366366
"grpc": ("https://grpc.github.io/grpc/python/", None),
367367
"proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None),
368368
"protobuf": ("https://googleapis.dev/python/protobuf/latest/", None),
369-
"pandas": ("http://pandas.pydata.org/pandas-docs/stable/", None),
369+
"dateutil": ("https://dateutil.readthedocs.io/en/latest/", None),
370370
"geopandas": ("https://geopandas.org/", None),
371+
"pandas": ("https://pandas.pydata.org/pandas-docs/dev", None),
371372
}
372373

373374

google/cloud/bigquery/_helpers.py

+46-1
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,9 @@
1919
import decimal
2020
import math
2121
import re
22-
from typing import Any, Union
22+
from typing import Any, Optional, Union
2323

24+
from dateutil import relativedelta
2425
from google.cloud._helpers import UTC
2526
from google.cloud._helpers import _date_from_iso8601_date
2627
from google.cloud._helpers import _datetime_from_microseconds
@@ -45,6 +46,14 @@
4546
re.VERBOSE,
4647
)
4748

49+
# BigQuery sends INTERVAL data in "canonical format"
50+
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#interval_type
51+
_INTERVAL_PATTERN = re.compile(
52+
r"(?P<calendar_sign>-?)(?P<years>\d+)-(?P<months>\d+) "
53+
r"(?P<days>-?\d+) "
54+
r"(?P<time_sign>-?)(?P<hours>\d+):(?P<minutes>\d+):(?P<seconds>\d+)\.?(?P<fraction>\d*)?$"
55+
)
56+
4857
_MIN_PYARROW_VERSION = packaging.version.Version("3.0.0")
4958
_MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0")
5059
_BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0")
@@ -191,6 +200,41 @@ def _int_from_json(value, field):
191200
return int(value)
192201

193202

203+
def _interval_from_json(
204+
value: Optional[str], field
205+
) -> Optional[relativedelta.relativedelta]:
206+
"""Coerce 'value' to an interval, if set or not nullable."""
207+
if not _not_null(value, field):
208+
return None
209+
if value is None:
210+
raise TypeError(f"got {value} for REQUIRED field: {repr(field)}")
211+
212+
parsed = _INTERVAL_PATTERN.match(value)
213+
if parsed is None:
214+
raise ValueError(f"got interval: '{value}' with unexpected format")
215+
216+
calendar_sign = -1 if parsed.group("calendar_sign") == "-" else 1
217+
years = calendar_sign * int(parsed.group("years"))
218+
months = calendar_sign * int(parsed.group("months"))
219+
days = int(parsed.group("days"))
220+
time_sign = -1 if parsed.group("time_sign") == "-" else 1
221+
hours = time_sign * int(parsed.group("hours"))
222+
minutes = time_sign * int(parsed.group("minutes"))
223+
seconds = time_sign * int(parsed.group("seconds"))
224+
fraction = parsed.group("fraction")
225+
microseconds = time_sign * int(fraction.ljust(6, "0")[:6]) if fraction else 0
226+
227+
return relativedelta.relativedelta(
228+
years=years,
229+
months=months,
230+
days=days,
231+
hours=hours,
232+
minutes=minutes,
233+
seconds=seconds,
234+
microseconds=microseconds,
235+
)
236+
237+
194238
def _float_from_json(value, field):
195239
"""Coerce 'value' to a float, if set or not nullable."""
196240
if _not_null(value, field):
@@ -327,6 +371,7 @@ def _record_from_json(value, field):
327371
_CELLDATA_FROM_JSON = {
328372
"INTEGER": _int_from_json,
329373
"INT64": _int_from_json,
374+
"INTERVAL": _interval_from_json,
330375
"FLOAT": _float_from_json,
331376
"FLOAT64": _float_from_json,
332377
"NUMERIC": _decimal_from_json,

google/cloud/bigquery/enums.py

+1
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,7 @@ class SqlTypeNames(str, enum.Enum):
254254
DATE = "DATE"
255255
TIME = "TIME"
256256
DATETIME = "DATETIME"
257+
INTERVAL = "INTERVAL" # NOTE: not available in legacy types
257258

258259

259260
class SqlParameterScalarTypes:

owlbot.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,9 @@
9898
microgenerator=True,
9999
split_system_tests=True,
100100
intersphinx_dependencies={
101-
"pandas": "http://pandas.pydata.org/pandas-docs/stable/",
101+
"dateutil": "https://dateutil.readthedocs.io/en/latest/",
102102
"geopandas": "https://geopandas.org/",
103+
"pandas": "https://pandas.pydata.org/pandas-docs/dev",
103104
},
104105
)
105106

@@ -115,10 +116,6 @@
115116
# Include custom SNIPPETS_TESTS job for performance.
116117
# https://github.com/googleapis/python-bigquery/issues/191
117118
".kokoro/presubmit/presubmit.cfg",
118-
# Group all renovate PRs together. If this works well, remove this and
119-
# update the shared templates (possibly with configuration option to
120-
# py_library.)
121-
"renovate.json",
122119
],
123120
)
124121

renovate.json

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
{
22
"extends": [
3-
"config:base", "group:all", ":preserveSemverRanges"
3+
"config:base",
4+
"group:all",
5+
":preserveSemverRanges",
6+
":disableDependencyDashboard"
47
],
58
"ignorePaths": [".pre-commit-config.yaml"],
69
"pip_requirements": {

setup.py

+1
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
"google-resumable-media >= 0.6.0, < 3.0dev",
4343
"packaging >= 14.3",
4444
"protobuf >= 3.12.0",
45+
"python-dateutil >= 2.7.2, <3.0dev",
4546
"requests >= 2.18.0, < 3.0.0dev",
4647
]
4748
extras = {

testing/constraints-3.6.txt

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ pandas==0.24.2
1818
proto-plus==1.10.0
1919
protobuf==3.12.0
2020
pyarrow==3.0.0
21+
python-dateutil==2.7.2
2122
requests==2.18.0
2223
Shapely==1.6.0
2324
six==1.13.0

tests/system/test_client.py

-5
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,6 @@
3737
except ImportError: # pragma: NO COVER
3838
bigquery_storage = None
3939

40-
try:
41-
import fastavro # to parse BQ storage client results
42-
except ImportError: # pragma: NO COVER
43-
fastavro = None
44-
4540
try:
4641
import pyarrow
4742
import pyarrow.types

tests/system/test_list_rows.py

+8
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
import datetime
1616
import decimal
1717

18+
from dateutil import relativedelta
19+
1820
from google.cloud import bigquery
1921
from google.cloud.bigquery import enums
2022

@@ -64,6 +66,9 @@ def test_list_rows_scalars(bigquery_client: bigquery.Client, scalars_table: str)
6466
assert row["datetime_col"] == datetime.datetime(2021, 7, 21, 11, 39, 45)
6567
assert row["geography_col"] == "POINT(-122.0838511 37.3860517)"
6668
assert row["int64_col"] == 123456789
69+
assert row["interval_col"] == relativedelta.relativedelta(
70+
years=7, months=11, days=9, hours=4, minutes=15, seconds=37, microseconds=123456
71+
)
6772
assert row["numeric_col"] == decimal.Decimal("1.23456789")
6873
assert row["bignumeric_col"] == decimal.Decimal("10.111213141516171819")
6974
assert row["float64_col"] == 1.25
@@ -95,6 +100,9 @@ def test_list_rows_scalars_extreme(
95100
assert row["datetime_col"] == datetime.datetime(9999, 12, 31, 23, 59, 59, 999999)
96101
assert row["geography_col"] == "POINT(-135 90)"
97102
assert row["int64_col"] == 9223372036854775807
103+
assert row["interval_col"] == relativedelta.relativedelta(
104+
years=-10000, days=-3660000, hours=-87840000
105+
)
98106
assert row["numeric_col"] == decimal.Decimal(f"9.{'9' * 37}E+28")
99107
assert row["bignumeric_col"] == decimal.Decimal(f"9.{'9' * 75}E+37")
100108
assert row["float64_col"] == float("Inf")

tests/unit/helpers/test_from_json.py

+157
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
# Copyright 2021 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from dateutil.relativedelta import relativedelta
16+
import pytest
17+
18+
from google.cloud.bigquery.schema import SchemaField
19+
20+
21+
def create_field(mode="NULLABLE", type_="IGNORED"):
22+
return SchemaField("test_field", type_, mode=mode)
23+
24+
25+
@pytest.fixture
26+
def mut():
27+
from google.cloud.bigquery import _helpers
28+
29+
return _helpers
30+
31+
32+
def test_interval_from_json_w_none_nullable(mut):
33+
got = mut._interval_from_json(None, create_field())
34+
assert got is None
35+
36+
37+
def test_interval_from_json_w_none_required(mut):
38+
with pytest.raises(TypeError):
39+
mut._interval_from_json(None, create_field(mode="REQUIRED"))
40+
41+
42+
def test_interval_from_json_w_invalid_format(mut):
43+
with pytest.raises(ValueError, match="NOT_AN_INTERVAL"):
44+
mut._interval_from_json("NOT_AN_INTERVAL", create_field())
45+
46+
47+
@pytest.mark.parametrize(
48+
("value", "expected"),
49+
(
50+
("0-0 0 0:0:0", relativedelta()),
51+
# SELECT INTERVAL X YEAR
52+
("-10000-0 0 0:0:0", relativedelta(years=-10000)),
53+
("-1-0 0 0:0:0", relativedelta(years=-1)),
54+
("1-0 0 0:0:0", relativedelta(years=1)),
55+
("10000-0 0 0:0:0", relativedelta(years=10000)),
56+
# SELECT INTERVAL X MONTH
57+
("-0-11 0 0:0:0", relativedelta(months=-11)),
58+
("-0-1 0 0:0:0", relativedelta(months=-1)),
59+
("0-1 0 0:0:0", relativedelta(months=1)),
60+
("0-11 0 0:0:0", relativedelta(months=11)),
61+
# SELECT INTERVAL X DAY
62+
("0-0 -3660000 0:0:0", relativedelta(days=-3660000)),
63+
("0-0 -1 0:0:0", relativedelta(days=-1)),
64+
("0-0 1 0:0:0", relativedelta(days=1)),
65+
("0-0 3660000 0:0:0", relativedelta(days=3660000)),
66+
# SELECT INTERVAL X HOUR
67+
("0-0 0 -87840000:0:0", relativedelta(hours=-87840000)),
68+
("0-0 0 -1:0:0", relativedelta(hours=-1)),
69+
("0-0 0 1:0:0", relativedelta(hours=1)),
70+
("0-0 0 87840000:0:0", relativedelta(hours=87840000)),
71+
# SELECT INTERVAL X MINUTE
72+
("0-0 0 -0:59:0", relativedelta(minutes=-59)),
73+
("0-0 0 -0:1:0", relativedelta(minutes=-1)),
74+
("0-0 0 0:1:0", relativedelta(minutes=1)),
75+
("0-0 0 0:59:0", relativedelta(minutes=59)),
76+
# SELECT INTERVAL X SECOND
77+
("0-0 0 -0:0:59", relativedelta(seconds=-59)),
78+
("0-0 0 -0:0:1", relativedelta(seconds=-1)),
79+
("0-0 0 0:0:1", relativedelta(seconds=1)),
80+
("0-0 0 0:0:59", relativedelta(seconds=59)),
81+
# SELECT (INTERVAL -1 SECOND) / 1000000
82+
("0-0 0 -0:0:0.000001", relativedelta(microseconds=-1)),
83+
("0-0 0 -0:0:59.999999", relativedelta(seconds=-59, microseconds=-999999)),
84+
("0-0 0 -0:0:59.999", relativedelta(seconds=-59, microseconds=-999000)),
85+
("0-0 0 0:0:59.999", relativedelta(seconds=59, microseconds=999000)),
86+
("0-0 0 0:0:59.999999", relativedelta(seconds=59, microseconds=999999)),
87+
# Test with multiple digits in each section.
88+
(
89+
"32-11 45 67:16:23.987654",
90+
relativedelta(
91+
years=32,
92+
months=11,
93+
days=45,
94+
hours=67,
95+
minutes=16,
96+
seconds=23,
97+
microseconds=987654,
98+
),
99+
),
100+
(
101+
"-32-11 -45 -67:16:23.987654",
102+
relativedelta(
103+
years=-32,
104+
months=-11,
105+
days=-45,
106+
hours=-67,
107+
minutes=-16,
108+
seconds=-23,
109+
microseconds=-987654,
110+
),
111+
),
112+
# Test with mixed +/- sections.
113+
(
114+
"9999-9 -999999 9999999:59:59.999999",
115+
relativedelta(
116+
years=9999,
117+
months=9,
118+
days=-999999,
119+
hours=9999999,
120+
minutes=59,
121+
seconds=59,
122+
microseconds=999999,
123+
),
124+
),
125+
# Test with fraction that is not microseconds.
126+
("0-0 0 0:0:42.", relativedelta(seconds=42)),
127+
("0-0 0 0:0:59.1", relativedelta(seconds=59, microseconds=100000)),
128+
("0-0 0 0:0:0.12", relativedelta(microseconds=120000)),
129+
("0-0 0 0:0:0.123", relativedelta(microseconds=123000)),
130+
("0-0 0 0:0:0.1234", relativedelta(microseconds=123400)),
131+
# Fractional seconds can cause rounding problems if cast to float. See:
132+
# https://github.com/googleapis/python-db-dtypes-pandas/issues/18
133+
("0-0 0 0:0:59.876543", relativedelta(seconds=59, microseconds=876543)),
134+
(
135+
"0-0 0 01:01:01.010101",
136+
relativedelta(hours=1, minutes=1, seconds=1, microseconds=10101),
137+
),
138+
(
139+
"0-0 0 09:09:09.090909",
140+
relativedelta(hours=9, minutes=9, seconds=9, microseconds=90909),
141+
),
142+
(
143+
"0-0 0 11:11:11.111111",
144+
relativedelta(hours=11, minutes=11, seconds=11, microseconds=111111),
145+
),
146+
(
147+
"0-0 0 19:16:23.987654",
148+
relativedelta(hours=19, minutes=16, seconds=23, microseconds=987654),
149+
),
150+
# Nanoseconds are not expected, but should not cause error.
151+
("0-0 0 0:0:00.123456789", relativedelta(microseconds=123456)),
152+
("0-0 0 0:0:59.87654321", relativedelta(seconds=59, microseconds=876543)),
153+
),
154+
)
155+
def test_w_string_values(mut, value, expected):
156+
got = mut._interval_from_json(value, create_field())
157+
assert got == expected

0 commit comments

Comments
 (0)