Skip to content

Commit 3ddbc66

Browse files
committed
fix: add tests for arrays in DataFrames
1 parent 9749e28 commit 3ddbc66

File tree

1 file changed

+128
-0
lines changed

1 file changed

+128
-0
lines changed

tests/unit/test_client.py

+128
Original file line numberDiff line numberDiff line change
@@ -7307,6 +7307,134 @@ def test_load_table_from_dataframe_struct_fields(self):
73077307
assert sent_config.source_format == job.SourceFormat.PARQUET
73087308
assert sent_config.schema == schema
73097309

7310+
@unittest.skipIf(pandas is None, "Requires `pandas`")
7311+
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
7312+
def test_load_table_from_dataframe_array_fields(self):
7313+
"""Test that a DataFrame with array columns can be uploaded correctly.
7314+
7315+
See: https://github.com/googleapis/python-bigquery/issues/19
7316+
"""
7317+
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
7318+
from google.cloud.bigquery import job
7319+
from google.cloud.bigquery.schema import SchemaField
7320+
7321+
client = self._make_client()
7322+
7323+
records = [(3.14, [1, 2])]
7324+
dataframe = pandas.DataFrame(
7325+
data=records, columns=["float_column", "array_column"]
7326+
)
7327+
7328+
schema = [
7329+
SchemaField("float_column", "FLOAT"),
7330+
SchemaField(
7331+
"array_column",
7332+
"INTEGER",
7333+
mode="REPEATED",
7334+
),
7335+
]
7336+
job_config = job.LoadJobConfig(schema=schema)
7337+
7338+
load_patch = mock.patch(
7339+
"google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
7340+
)
7341+
7342+
get_table_patch = mock.patch(
7343+
"google.cloud.bigquery.client.Client.get_table",
7344+
autospec=True,
7345+
side_effect=google.api_core.exceptions.NotFound("Table not found"),
7346+
)
7347+
7348+
with load_patch as load_table_from_file, get_table_patch:
7349+
client.load_table_from_dataframe(
7350+
dataframe,
7351+
self.TABLE_REF,
7352+
job_config=job_config,
7353+
location=self.LOCATION,
7354+
)
7355+
7356+
load_table_from_file.assert_called_once_with(
7357+
client,
7358+
mock.ANY,
7359+
self.TABLE_REF,
7360+
num_retries=_DEFAULT_NUM_RETRIES,
7361+
rewind=True,
7362+
size=mock.ANY,
7363+
job_id=mock.ANY,
7364+
job_id_prefix=None,
7365+
location=self.LOCATION,
7366+
project=None,
7367+
job_config=mock.ANY,
7368+
timeout=DEFAULT_TIMEOUT,
7369+
)
7370+
7371+
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
7372+
assert sent_config.source_format == job.SourceFormat.PARQUET
7373+
assert sent_config.schema == schema
7374+
7375+
@unittest.skipIf(pandas is None, "Requires `pandas`")
7376+
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
7377+
def test_load_table_from_dataframe_array_fields_w_auto_schema(self):
7378+
"""Test that a DataFrame with array columns can be uploaded correctly.
7379+
7380+
See: https://github.com/googleapis/python-bigquery/issues/19
7381+
"""
7382+
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
7383+
from google.cloud.bigquery import job
7384+
from google.cloud.bigquery.schema import SchemaField
7385+
7386+
client = self._make_client()
7387+
7388+
records = [(3.14, [1, 2])]
7389+
dataframe = pandas.DataFrame(
7390+
data=records, columns=["float_column", "array_column"]
7391+
)
7392+
7393+
expected_schema = [
7394+
SchemaField("float_column", "FLOAT"),
7395+
SchemaField(
7396+
"array_column",
7397+
"INT64",
7398+
mode="REPEATED",
7399+
),
7400+
]
7401+
7402+
load_patch = mock.patch(
7403+
"google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
7404+
)
7405+
7406+
get_table_patch = mock.patch(
7407+
"google.cloud.bigquery.client.Client.get_table",
7408+
autospec=True,
7409+
side_effect=google.api_core.exceptions.NotFound("Table not found"),
7410+
)
7411+
7412+
with load_patch as load_table_from_file, get_table_patch:
7413+
client.load_table_from_dataframe(
7414+
dataframe,
7415+
self.TABLE_REF,
7416+
location=self.LOCATION,
7417+
)
7418+
7419+
load_table_from_file.assert_called_once_with(
7420+
client,
7421+
mock.ANY,
7422+
self.TABLE_REF,
7423+
num_retries=_DEFAULT_NUM_RETRIES,
7424+
rewind=True,
7425+
size=mock.ANY,
7426+
job_id=mock.ANY,
7427+
job_id_prefix=None,
7428+
location=self.LOCATION,
7429+
project=None,
7430+
job_config=mock.ANY,
7431+
timeout=DEFAULT_TIMEOUT,
7432+
)
7433+
7434+
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
7435+
assert sent_config.source_format == job.SourceFormat.PARQUET
7436+
assert sent_config.schema == expected_schema
7437+
73107438
@unittest.skipIf(pandas is None, "Requires `pandas`")
73117439
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
73127440
def test_load_table_from_dataframe_w_partial_schema(self):

0 commit comments

Comments
 (0)