Skip to content

Commit 5fd840e

Browse files
feat(bigquery): unit and system test for dataframe with int column with Nan values (#39)
* feat(bigquery): add unit and system tests for int columns * feat(bigquery): cosmetic changes * feat(bigquery): use pkg_resources for comparison * feat(bigquery): nit
1 parent 18eb9e8 commit 5fd840e

File tree

2 files changed

+160
-0
lines changed

2 files changed

+160
-0
lines changed

tests/system.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import psutil
3232
import pytest
3333
import pytz
34+
import pkg_resources
3435

3536
try:
3637
from google.cloud import bigquery_storage_v1beta1
@@ -125,6 +126,9 @@
125126
(TooManyRequests, InternalServerError, ServiceUnavailable)
126127
)
127128

129+
PANDAS_MINIMUM_VERSION = pkg_resources.parse_version("1.0.0")
130+
PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version
131+
128132

129133
def _has_rows(result):
130134
return len(result) > 0
@@ -742,6 +746,66 @@ def test_load_table_from_dataframe_w_automatic_schema(self):
742746
)
743747
self.assertEqual(table.num_rows, 3)
744748

749+
@unittest.skipIf(
750+
pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIMUM_VERSION,
751+
"Only `pandas version >=1.0.0` is supported",
752+
)
753+
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
754+
def test_load_table_from_dataframe_w_nullable_int64_datatype(self):
755+
"""Test that a DataFrame containing column with None-type values and int64 datatype
756+
can be uploaded if a BigQuery schema is specified.
757+
758+
https://github.com/googleapis/python-bigquery/issues/22
759+
"""
760+
761+
dataset_id = _make_dataset_id("bq_load_test")
762+
self.temp_dataset(dataset_id)
763+
table_id = "{}.{}.load_table_from_dataframe_w_nullable_int64_datatype".format(
764+
Config.CLIENT.project, dataset_id
765+
)
766+
table_schema = (bigquery.SchemaField("x", "INTEGER", mode="NULLABLE"),)
767+
table = retry_403(Config.CLIENT.create_table)(
768+
Table(table_id, schema=table_schema)
769+
)
770+
self.to_delete.insert(0, table)
771+
772+
df_data = collections.OrderedDict(
773+
[("x", pandas.Series([1, 2, None, 4], dtype="Int64"))]
774+
)
775+
dataframe = pandas.DataFrame(df_data, columns=df_data.keys())
776+
load_job = Config.CLIENT.load_table_from_dataframe(dataframe, table_id)
777+
load_job.result()
778+
table = Config.CLIENT.get_table(table_id)
779+
self.assertEqual(tuple(table.schema), (bigquery.SchemaField("x", "INTEGER"),))
780+
self.assertEqual(table.num_rows, 4)
781+
782+
@unittest.skipIf(
783+
pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIMUM_VERSION,
784+
"Only `pandas version >=1.0.0` is supported",
785+
)
786+
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
787+
def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(self):
788+
"""Test that a DataFrame containing column with None-type values and int64 datatype
789+
can be uploaded without specifying a schema.
790+
791+
https://github.com/googleapis/python-bigquery/issues/22
792+
"""
793+
794+
dataset_id = _make_dataset_id("bq_load_test")
795+
self.temp_dataset(dataset_id)
796+
table_id = "{}.{}.load_table_from_dataframe_w_nullable_int64_datatype".format(
797+
Config.CLIENT.project, dataset_id
798+
)
799+
df_data = collections.OrderedDict(
800+
[("x", pandas.Series([1, 2, None, 4], dtype="Int64"))]
801+
)
802+
dataframe = pandas.DataFrame(df_data, columns=df_data.keys())
803+
load_job = Config.CLIENT.load_table_from_dataframe(dataframe, table_id)
804+
load_job.result()
805+
table = Config.CLIENT.get_table(table_id)
806+
self.assertEqual(tuple(table.schema), (bigquery.SchemaField("x", "INTEGER"),))
807+
self.assertEqual(table.num_rows, 4)
808+
745809
@unittest.skipIf(pandas is None, "Requires `pandas`")
746810
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
747811
def test_load_table_from_dataframe_w_nulls(self):

tests/unit/test_client.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from six.moves import http_client
3131
import pytest
3232
import pytz
33+
import pkg_resources
3334

3435
try:
3536
import fastparquet
@@ -56,6 +57,9 @@
5657
bigquery_storage_v1beta1 = None
5758
from tests.unit.helpers import make_connection
5859

60+
PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0")
61+
PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version
62+
5963

6064
def _make_credentials():
6165
import google.auth.credentials
@@ -6973,6 +6977,98 @@ def test_load_table_from_dataframe_no_schema_warning_wo_pyarrow(self):
69736977
]
69746978
assert matches, "A missing schema deprecation warning was not raised."
69756979

6980+
@unittest.skipIf(
6981+
pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION,
6982+
"Only `pandas version >=1.0.0` supported",
6983+
)
6984+
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
6985+
def test_load_table_from_dataframe_w_nullable_int64_datatype(self):
6986+
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
6987+
from google.cloud.bigquery import job
6988+
from google.cloud.bigquery.schema import SchemaField
6989+
6990+
client = self._make_client()
6991+
dataframe = pandas.DataFrame({"x": [1, 2, None, 4]}, dtype="Int64")
6992+
load_patch = mock.patch(
6993+
"google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
6994+
)
6995+
6996+
get_table_patch = mock.patch(
6997+
"google.cloud.bigquery.client.Client.get_table",
6998+
autospec=True,
6999+
return_value=mock.Mock(schema=[SchemaField("x", "INT64", "NULLABLE")]),
7000+
)
7001+
7002+
with load_patch as load_table_from_file, get_table_patch:
7003+
client.load_table_from_dataframe(
7004+
dataframe, self.TABLE_REF, location=self.LOCATION
7005+
)
7006+
7007+
load_table_from_file.assert_called_once_with(
7008+
client,
7009+
mock.ANY,
7010+
self.TABLE_REF,
7011+
num_retries=_DEFAULT_NUM_RETRIES,
7012+
rewind=True,
7013+
job_id=mock.ANY,
7014+
job_id_prefix=None,
7015+
location=self.LOCATION,
7016+
project=None,
7017+
job_config=mock.ANY,
7018+
)
7019+
7020+
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
7021+
assert sent_config.source_format == job.SourceFormat.PARQUET
7022+
assert tuple(sent_config.schema) == (
7023+
SchemaField("x", "INT64", "NULLABLE", None),
7024+
)
7025+
7026+
@unittest.skipIf(
7027+
pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION,
7028+
"Only `pandas version >=1.0.0` supported",
7029+
)
7030+
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
7031+
def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(self):
7032+
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
7033+
from google.cloud.bigquery import job
7034+
from google.cloud.bigquery.schema import SchemaField
7035+
7036+
client = self._make_client()
7037+
dataframe = pandas.DataFrame({"x": [1, 2, None, 4]}, dtype="Int64")
7038+
load_patch = mock.patch(
7039+
"google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
7040+
)
7041+
7042+
get_table_patch = mock.patch(
7043+
"google.cloud.bigquery.client.Client.get_table",
7044+
autospec=True,
7045+
side_effect=google.api_core.exceptions.NotFound("Table not found"),
7046+
)
7047+
7048+
with load_patch as load_table_from_file, get_table_patch:
7049+
client.load_table_from_dataframe(
7050+
dataframe, self.TABLE_REF, location=self.LOCATION
7051+
)
7052+
7053+
load_table_from_file.assert_called_once_with(
7054+
client,
7055+
mock.ANY,
7056+
self.TABLE_REF,
7057+
num_retries=_DEFAULT_NUM_RETRIES,
7058+
rewind=True,
7059+
job_id=mock.ANY,
7060+
job_id_prefix=None,
7061+
location=self.LOCATION,
7062+
project=None,
7063+
job_config=mock.ANY,
7064+
)
7065+
7066+
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
7067+
assert sent_config.source_format == job.SourceFormat.PARQUET
7068+
assert tuple(sent_config.schema) == (
7069+
SchemaField("x", "INT64", "NULLABLE", None),
7070+
)
7071+
69767072
@unittest.skipIf(pandas is None, "Requires `pandas`")
69777073
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
69787074
def test_load_table_from_dataframe_struct_fields_error(self):

0 commit comments

Comments
 (0)