Skip to content

Commit e3704c3

Browse files
authored
test: refactor list_rows tests and add test for scalars (#829)
* test: refactor `list_rows` tests and add test for scalars * fix JSON formatting * add TODO for INTERVAL Arrow support * format tests
1 parent c44d45b commit e3704c3

File tree

6 files changed

+181
-83
lines changed

6 files changed

+181
-83
lines changed

tests/data/scalars.jsonl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
{"bool_col": true, "bytes_col": "abcd", "date_col": "2021-07-21", "datetime_col": "2021-07-21 11:39:45", "geography_col": "POINT(-122.0838511 37.3860517)", "int64_col": "123456789", "numeric_col": "1.23456789", "bignumeric_col": "10.111213141516171819", "float64_col": "1.25", "string_col": "Hello, World", "time_col": "11:41:43.07616", "timestamp_col": "2021-07-21T17:43:43.945289Z"}
2-
{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "string_col": null, "time_col": null, "timestamp_col": null}
1+
{"bool_col": true, "bytes_col": "SGVsbG8sIFdvcmxkIQ==", "date_col": "2021-07-21", "datetime_col": "2021-07-21 11:39:45", "geography_col": "POINT(-122.0838511 37.3860517)", "int64_col": "123456789", "interval_col": "P7Y11M9DT4H15M37.123456S", "numeric_col": "1.23456789", "bignumeric_col": "10.111213141516171819", "float64_col": "1.25", "rowindex": 0, "string_col": "Hello, World!", "time_col": "11:41:43.07616", "timestamp_col": "2021-07-21T17:43:43.945289Z"}
2+
{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "interval_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "rowindex": 1, "string_col": null, "time_col": null, "timestamp_col": null}

tests/data/scalars_extreme.jsonl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
{"bool_col": true, "bytes_col": "DQo=\n", "date_col": "9999-12-31", "datetime_col": "9999-12-31 23:59:59.999999", "geography_col": "POINT(-135.0000 90.0000)", "int64_col": "9223372036854775807", "numeric_col": "9.9999999999999999999999999999999999999E+28", "bignumeric_col": "9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "+inf", "string_col": "Hello, World", "time_col": "23:59:59.99999", "timestamp_col": "9999-12-31T23:59:59.999999Z"}
2-
{"bool_col": false, "bytes_col": "8J+Zgw==\n", "date_col": "0001-01-01", "datetime_col": "0001-01-01 00:00:00", "geography_col": "POINT(45.0000 -90.0000)", "int64_col": "-9223372036854775808", "numeric_col": "-9.9999999999999999999999999999999999999E+28", "bignumeric_col": "-9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "-inf", "string_col": "Hello, World", "time_col": "00:00:00", "timestamp_col": "0001-01-01T00:00:00.000000Z"}
3-
{"bool_col": true, "bytes_col": "AA==\n", "date_col": "1900-01-01", "datetime_col": "1900-01-01 00:00:00", "geography_col": "POINT(-180.0000 0.0000)", "int64_col": "-1", "numeric_col": "0.000000001", "bignumeric_col": "-0.00000000000000000000000000000000000001", "float64_col": "nan", "string_col": "こんにちは", "time_col": "00:00:00.000001", "timestamp_col": "1900-01-01T00:00:00.000000Z"}
4-
{"bool_col": false, "bytes_col": "", "date_col": "1970-01-01", "datetime_col": "1970-01-01 00:00:00", "geography_col": "POINT(0 0)", "int64_col": "0", "numeric_col": "0.0", "bignumeric_col": "0.0", "float64_col": 0.0, "string_col": "", "time_col": "12:00:00", "timestamp_col": "1970-01-01T00:00:00.000000Z"}
5-
{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "string_col": null, "time_col": null, "timestamp_col": null}
1+
{"bool_col": true, "bytes_col": "DQo=\n", "date_col": "9999-12-31", "datetime_col": "9999-12-31 23:59:59.999999", "geography_col": "POINT(-135.0000 90.0000)", "int64_col": "9223372036854775807", "interval_col": "P-10000Y0M-3660000DT-87840000H0M0S", "numeric_col": "9.9999999999999999999999999999999999999E+28", "bignumeric_col": "9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "+inf", "rowindex": 0, "string_col": "Hello, World", "time_col": "23:59:59.999999", "timestamp_col": "9999-12-31T23:59:59.999999Z"}
2+
{"bool_col": false, "bytes_col": "8J+Zgw==\n", "date_col": "0001-01-01", "datetime_col": "0001-01-01 00:00:00", "geography_col": "POINT(45.0000 -90.0000)", "int64_col": "-9223372036854775808", "interval_col": "P10000Y0M3660000DT87840000H0M0S", "numeric_col": "-9.9999999999999999999999999999999999999E+28", "bignumeric_col": "-9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "-inf", "rowindex": 1, "string_col": "Hello, World", "time_col": "00:00:00", "timestamp_col": "0001-01-01T00:00:00.000000Z"}
3+
{"bool_col": true, "bytes_col": "AA==\n", "date_col": "1900-01-01", "datetime_col": "1900-01-01 00:00:00", "geography_col": "POINT(-180.0000 0.0000)", "int64_col": "-1", "interval_col": "P0Y0M0DT0H0M0.000001S", "numeric_col": "0.000000001", "bignumeric_col": "-0.00000000000000000000000000000000000001", "float64_col": "nan", "rowindex": 2, "string_col": "こんにちは", "time_col": "00:00:00.000001", "timestamp_col": "1900-01-01T00:00:00.000000Z"}
4+
{"bool_col": false, "bytes_col": "", "date_col": "1970-01-01", "datetime_col": "1970-01-01 00:00:00", "geography_col": "POINT(0 0)", "int64_col": "0", "interval_col": "P0Y0M0DT0H0M0S", "numeric_col": "0.0", "bignumeric_col": "0.0", "float64_col": 0.0, "rowindex": 3, "string_col": "", "time_col": "12:00:00", "timestamp_col": "1970-01-01T00:00:00.000000Z"}
5+
{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "interval_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "rowindex": 4, "string_col": null, "time_col": null, "timestamp_col": null}

tests/data/scalars_schema.json

Lines changed: 32 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,33 @@
11
[
22
{
33
"mode": "NULLABLE",
4-
"name": "timestamp_col",
5-
"type": "TIMESTAMP"
4+
"name": "bool_col",
5+
"type": "BOOLEAN"
66
},
77
{
88
"mode": "NULLABLE",
9-
"name": "time_col",
10-
"type": "TIME"
9+
"name": "bignumeric_col",
10+
"type": "BIGNUMERIC"
1111
},
1212
{
1313
"mode": "NULLABLE",
14-
"name": "float64_col",
15-
"type": "FLOAT"
14+
"name": "bytes_col",
15+
"type": "BYTES"
1616
},
1717
{
1818
"mode": "NULLABLE",
19-
"name": "datetime_col",
20-
"type": "DATETIME"
19+
"name": "date_col",
20+
"type": "DATE"
2121
},
2222
{
2323
"mode": "NULLABLE",
24-
"name": "bignumeric_col",
25-
"type": "BIGNUMERIC"
24+
"name": "datetime_col",
25+
"type": "DATETIME"
2626
},
2727
{
2828
"mode": "NULLABLE",
29-
"name": "numeric_col",
30-
"type": "NUMERIC"
29+
"name": "float64_col",
30+
"type": "FLOAT"
3131
},
3232
{
3333
"mode": "NULLABLE",
@@ -36,27 +36,37 @@
3636
},
3737
{
3838
"mode": "NULLABLE",
39-
"name": "date_col",
40-
"type": "DATE"
39+
"name": "int64_col",
40+
"type": "INTEGER"
4141
},
4242
{
4343
"mode": "NULLABLE",
44-
"name": "string_col",
45-
"type": "STRING"
44+
"name": "interval_col",
45+
"type": "INTERVAL"
4646
},
4747
{
4848
"mode": "NULLABLE",
49-
"name": "bool_col",
50-
"type": "BOOLEAN"
49+
"name": "numeric_col",
50+
"type": "NUMERIC"
51+
},
52+
{
53+
"mode": "REQUIRED",
54+
"name": "rowindex",
55+
"type": "INTEGER"
5156
},
5257
{
5358
"mode": "NULLABLE",
54-
"name": "bytes_col",
55-
"type": "BYTES"
59+
"name": "string_col",
60+
"type": "STRING"
5661
},
5762
{
5863
"mode": "NULLABLE",
59-
"name": "int64_col",
60-
"type": "INTEGER"
64+
"name": "time_col",
65+
"type": "TIME"
66+
},
67+
{
68+
"mode": "NULLABLE",
69+
"name": "timestamp_col",
70+
"type": "TIMESTAMP"
6171
}
6272
]

tests/system/test_arrow.py

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,14 @@
1414

1515
"""System tests for Arrow connector."""
1616

17+
from typing import Optional
18+
1719
import pytest
1820

21+
from google.cloud import bigquery
22+
from google.cloud.bigquery import enums
23+
24+
1925
pyarrow = pytest.importorskip(
2026
"pyarrow", minversion="3.0.0"
2127
) # Needs decimal256 for BIGNUMERIC columns.
@@ -31,17 +37,35 @@
3137
),
3238
)
3339
def test_list_rows_nullable_scalars_dtypes(
34-
bigquery_client,
35-
scalars_table,
36-
scalars_extreme_table,
37-
max_results,
38-
scalars_table_name,
40+
bigquery_client: bigquery.Client,
41+
scalars_table: str,
42+
scalars_extreme_table: str,
43+
max_results: Optional[int],
44+
scalars_table_name: str,
3945
):
4046
table_id = scalars_table
4147
if scalars_table_name == "scalars_extreme_table":
4248
table_id = scalars_extreme_table
49+
50+
# TODO(GH#836): Avoid INTERVAL columns until they are supported by the
51+
# BigQuery Storage API and pyarrow.
52+
schema = [
53+
bigquery.SchemaField("bool_col", enums.SqlTypeNames.BOOLEAN),
54+
bigquery.SchemaField("bignumeric_col", enums.SqlTypeNames.BIGNUMERIC),
55+
bigquery.SchemaField("bytes_col", enums.SqlTypeNames.BYTES),
56+
bigquery.SchemaField("date_col", enums.SqlTypeNames.DATE),
57+
bigquery.SchemaField("datetime_col", enums.SqlTypeNames.DATETIME),
58+
bigquery.SchemaField("float64_col", enums.SqlTypeNames.FLOAT64),
59+
bigquery.SchemaField("geography_col", enums.SqlTypeNames.GEOGRAPHY),
60+
bigquery.SchemaField("int64_col", enums.SqlTypeNames.INT64),
61+
bigquery.SchemaField("numeric_col", enums.SqlTypeNames.NUMERIC),
62+
bigquery.SchemaField("string_col", enums.SqlTypeNames.STRING),
63+
bigquery.SchemaField("time_col", enums.SqlTypeNames.TIME),
64+
bigquery.SchemaField("timestamp_col", enums.SqlTypeNames.TIMESTAMP),
65+
]
66+
4367
arrow_table = bigquery_client.list_rows(
44-
table_id, max_results=max_results,
68+
table_id, max_results=max_results, selected_fields=schema,
4569
).to_arrow()
4670

4771
schema = arrow_table.schema

tests/system/test_client.py

Lines changed: 0 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -2428,54 +2428,6 @@ def test_nested_table_to_arrow(self):
24282428
self.assertTrue(pyarrow.types.is_list(record_col[1].type))
24292429
self.assertTrue(pyarrow.types.is_int64(record_col[1].type.value_type))
24302430

2431-
def test_list_rows_empty_table(self):
2432-
from google.cloud.bigquery.table import RowIterator
2433-
2434-
dataset_id = _make_dataset_id("empty_table")
2435-
dataset = self.temp_dataset(dataset_id)
2436-
table_ref = dataset.table("empty_table")
2437-
table = Config.CLIENT.create_table(bigquery.Table(table_ref))
2438-
2439-
# It's a bit silly to list rows for an empty table, but this does
2440-
# happen as the result of a DDL query from an IPython magic command.
2441-
rows = Config.CLIENT.list_rows(table)
2442-
self.assertIsInstance(rows, RowIterator)
2443-
self.assertEqual(tuple(rows), ())
2444-
2445-
def test_list_rows_page_size(self):
2446-
from google.cloud.bigquery.job import SourceFormat
2447-
from google.cloud.bigquery.job import WriteDisposition
2448-
2449-
num_items = 7
2450-
page_size = 3
2451-
num_pages, num_last_page = divmod(num_items, page_size)
2452-
2453-
SF = bigquery.SchemaField
2454-
schema = [SF("string_col", "STRING", mode="NULLABLE")]
2455-
to_insert = [{"string_col": "item%d" % i} for i in range(num_items)]
2456-
rows = [json.dumps(row) for row in to_insert]
2457-
body = io.BytesIO("{}\n".format("\n".join(rows)).encode("ascii"))
2458-
2459-
table_id = "test_table"
2460-
dataset = self.temp_dataset(_make_dataset_id("nested_df"))
2461-
table = dataset.table(table_id)
2462-
self.to_delete.insert(0, table)
2463-
job_config = bigquery.LoadJobConfig()
2464-
job_config.write_disposition = WriteDisposition.WRITE_TRUNCATE
2465-
job_config.source_format = SourceFormat.NEWLINE_DELIMITED_JSON
2466-
job_config.schema = schema
2467-
# Load a table using a local JSON file from memory.
2468-
Config.CLIENT.load_table_from_file(body, table, job_config=job_config).result()
2469-
2470-
df = Config.CLIENT.list_rows(table, selected_fields=schema, page_size=page_size)
2471-
pages = df.pages
2472-
2473-
for i in range(num_pages):
2474-
page = next(pages)
2475-
self.assertEqual(page.num_items, page_size)
2476-
page = next(pages)
2477-
self.assertEqual(page.num_items, num_last_page)
2478-
24792431
def temp_dataset(self, dataset_id, location=None):
24802432
project = Config.CLIENT.project
24812433
dataset_ref = bigquery.DatasetReference(project, dataset_id)

tests/system/test_list_rows.py

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
# Copyright 2021 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import datetime
16+
import decimal
17+
18+
from google.cloud import bigquery
19+
from google.cloud.bigquery import enums
20+
21+
22+
def test_list_rows_empty_table(bigquery_client: bigquery.Client, table_id: str):
23+
from google.cloud.bigquery.table import RowIterator
24+
25+
table = bigquery_client.create_table(table_id)
26+
27+
# It's a bit silly to list rows for an empty table, but this does
28+
# happen as the result of a DDL query from an IPython magic command.
29+
rows = bigquery_client.list_rows(table)
30+
assert isinstance(rows, RowIterator)
31+
assert tuple(rows) == ()
32+
33+
34+
def test_list_rows_page_size(bigquery_client: bigquery.Client, table_id: str):
35+
num_items = 7
36+
page_size = 3
37+
num_pages, num_last_page = divmod(num_items, page_size)
38+
39+
to_insert = [{"string_col": "item%d" % i, "rowindex": i} for i in range(num_items)]
40+
bigquery_client.load_table_from_json(to_insert, table_id).result()
41+
42+
df = bigquery_client.list_rows(
43+
table_id,
44+
selected_fields=[bigquery.SchemaField("string_col", enums.SqlTypeNames.STRING)],
45+
page_size=page_size,
46+
)
47+
pages = df.pages
48+
49+
for i in range(num_pages):
50+
page = next(pages)
51+
assert page.num_items == page_size
52+
page = next(pages)
53+
assert page.num_items == num_last_page
54+
55+
56+
def test_list_rows_scalars(bigquery_client: bigquery.Client, scalars_table: str):
57+
rows = sorted(
58+
bigquery_client.list_rows(scalars_table), key=lambda row: row["rowindex"]
59+
)
60+
row = rows[0]
61+
assert row["bool_col"] # True
62+
assert row["bytes_col"] == b"Hello, World!"
63+
assert row["date_col"] == datetime.date(2021, 7, 21)
64+
assert row["datetime_col"] == datetime.datetime(2021, 7, 21, 11, 39, 45)
65+
assert row["geography_col"] == "POINT(-122.0838511 37.3860517)"
66+
assert row["int64_col"] == 123456789
67+
assert row["numeric_col"] == decimal.Decimal("1.23456789")
68+
assert row["bignumeric_col"] == decimal.Decimal("10.111213141516171819")
69+
assert row["float64_col"] == 1.25
70+
assert row["string_col"] == "Hello, World!"
71+
assert row["time_col"] == datetime.time(11, 41, 43, 76160)
72+
assert row["timestamp_col"] == datetime.datetime(
73+
2021, 7, 21, 17, 43, 43, 945289, tzinfo=datetime.timezone.utc
74+
)
75+
76+
nullrow = rows[1]
77+
for column, value in nullrow.items():
78+
if column == "rowindex":
79+
assert value == 1
80+
else:
81+
assert value is None
82+
83+
84+
def test_list_rows_scalars_extreme(
85+
bigquery_client: bigquery.Client, scalars_extreme_table: str
86+
):
87+
rows = sorted(
88+
bigquery_client.list_rows(scalars_extreme_table),
89+
key=lambda row: row["rowindex"],
90+
)
91+
row = rows[0]
92+
assert row["bool_col"] # True
93+
assert row["bytes_col"] == b"\r\n"
94+
assert row["date_col"] == datetime.date(9999, 12, 31)
95+
assert row["datetime_col"] == datetime.datetime(9999, 12, 31, 23, 59, 59, 999999)
96+
assert row["geography_col"] == "POINT(-135 90)"
97+
assert row["int64_col"] == 9223372036854775807
98+
assert row["numeric_col"] == decimal.Decimal(f"9.{'9' * 37}E+28")
99+
assert row["bignumeric_col"] == decimal.Decimal(f"9.{'9' * 75}E+37")
100+
assert row["float64_col"] == float("Inf")
101+
assert row["string_col"] == "Hello, World"
102+
assert row["time_col"] == datetime.time(23, 59, 59, 999999)
103+
assert row["timestamp_col"] == datetime.datetime(
104+
9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc
105+
)
106+
107+
nullrow = rows[4]
108+
for column, value in nullrow.items():
109+
if column == "rowindex":
110+
assert value == 4
111+
else:
112+
assert value is None

0 commit comments

Comments
 (0)