Skip to content

Commit 656d2fa

Browse files
plamuttswast
andauthored
fix: error inserting DataFrame with REPEATED field (#925)
Co-authored-by: Tim Swast <[email protected]>
1 parent 8448922 commit 656d2fa

File tree

2 files changed

+56
-15
lines changed

2 files changed

+56
-15
lines changed

google/cloud/bigquery/_pandas_helpers.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -844,7 +844,13 @@ def dataframe_to_json_generator(dataframe):
844844
output = {}
845845
for column, value in zip(dataframe.columns, row):
846846
# Omit NaN values.
847-
if pandas.isna(value):
847+
is_nan = pandas.isna(value)
848+
849+
# isna() can also return an array-like of bools, but the latter's boolean
850+
# value is ambiguous, hence an extra check. An array-like value is *not*
851+
# considered a NaN, however.
852+
if isinstance(is_nan, bool) and is_nan:
848853
continue
849854
output[column] = value
855+
850856
yield output

tests/unit/test__pandas_helpers.py

+49-14
Original file line numberDiff line numberDiff line change
@@ -821,6 +821,41 @@ def test_dataframe_to_json_generator(module_under_test):
821821
assert list(rows) == expected
822822

823823

824+
def test_dataframe_to_json_generator_repeated_field(module_under_test):
825+
pytest.importorskip(
826+
"pandas",
827+
minversion=str(PANDAS_MINIUM_VERSION),
828+
reason=(
829+
f"Requires `pandas version >= {PANDAS_MINIUM_VERSION}` "
830+
"which introduces pandas.NA"
831+
),
832+
)
833+
834+
df_data = [
835+
collections.OrderedDict(
836+
[("repeated_col", [pandas.NA, 2, None, 4]), ("not_repeated_col", "first")]
837+
),
838+
collections.OrderedDict(
839+
[
840+
("repeated_col", ["a", "b", mock.sentinel.foo, "d"]),
841+
("not_repeated_col", "second"),
842+
]
843+
),
844+
]
845+
dataframe = pandas.DataFrame(df_data)
846+
847+
rows = module_under_test.dataframe_to_json_generator(dataframe)
848+
849+
expected = [
850+
{"repeated_col": [pandas.NA, 2, None, 4], "not_repeated_col": "first"},
851+
{
852+
"repeated_col": ["a", "b", mock.sentinel.foo, "d"],
853+
"not_repeated_col": "second",
854+
},
855+
]
856+
assert list(rows) == expected
857+
858+
824859
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
825860
def test_list_columns_and_indexes_with_named_index(module_under_test):
826861
df_data = collections.OrderedDict(
@@ -882,7 +917,7 @@ def test_list_columns_and_indexes_with_multiindex(module_under_test):
882917
def test_dataframe_to_bq_schema_dict_sequence(module_under_test):
883918
df_data = collections.OrderedDict(
884919
[
885-
("str_column", [u"hello", u"world"]),
920+
("str_column", ["hello", "world"]),
886921
("int_column", [42, 8]),
887922
("bool_column", [True, False]),
888923
]
@@ -1070,7 +1105,7 @@ def test_dataframe_to_arrow_dict_sequence_schema(module_under_test):
10701105
]
10711106

10721107
dataframe = pandas.DataFrame(
1073-
{"field01": [u"hello", u"world"], "field02": [True, False]}
1108+
{"field01": ["hello", "world"], "field02": [True, False]}
10741109
)
10751110

10761111
arrow_table = module_under_test.dataframe_to_arrow(dataframe, dict_schema)
@@ -1139,8 +1174,8 @@ def test_dataframe_to_parquet_compression_method(module_under_test):
11391174
def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(module_under_test):
11401175
dataframe = pandas.DataFrame(
11411176
data=[
1142-
{"id": 10, "status": u"FOO", "execution_date": datetime.date(2019, 5, 10)},
1143-
{"id": 20, "status": u"BAR", "created_at": datetime.date(2018, 9, 12)},
1177+
{"id": 10, "status": "FOO", "execution_date": datetime.date(2019, 5, 10)},
1178+
{"id": 20, "status": "BAR", "created_at": datetime.date(2018, 9, 12)},
11441179
]
11451180
)
11461181

@@ -1167,8 +1202,8 @@ def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(module_under_test):
11671202
def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test):
11681203
dataframe = pandas.DataFrame(
11691204
data=[
1170-
{"id": 10, "status": u"FOO", "created_at": datetime.date(2019, 5, 10)},
1171-
{"id": 20, "status": u"BAR", "created_at": datetime.date(2018, 9, 12)},
1205+
{"id": 10, "status": "FOO", "created_at": datetime.date(2019, 5, 10)},
1206+
{"id": 20, "status": "BAR", "created_at": datetime.date(2018, 9, 12)},
11721207
]
11731208
)
11741209

@@ -1197,8 +1232,8 @@ def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test):
11971232
def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test):
11981233
dataframe = pandas.DataFrame(
11991234
data=[
1200-
{"struct_field": {"one": 2}, "status": u"FOO"},
1201-
{"struct_field": {"two": u"222"}, "status": u"BAR"},
1235+
{"struct_field": {"one": 2}, "status": "FOO"},
1236+
{"struct_field": {"two": "222"}, "status": "BAR"},
12021237
]
12031238
)
12041239

@@ -1252,7 +1287,7 @@ def test_augment_schema_type_detection_succeeds(module_under_test):
12521287
"timestamp_field": datetime.datetime(2005, 5, 31, 14, 25, 55),
12531288
"date_field": datetime.date(2005, 5, 31),
12541289
"bytes_field": b"some bytes",
1255-
"string_field": u"some characters",
1290+
"string_field": "some characters",
12561291
"numeric_field": decimal.Decimal("123.456"),
12571292
"bignumeric_field": decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)),
12581293
}
@@ -1312,13 +1347,13 @@ def test_augment_schema_type_detection_fails(module_under_test):
13121347
dataframe = pandas.DataFrame(
13131348
data=[
13141349
{
1315-
"status": u"FOO",
1350+
"status": "FOO",
13161351
"struct_field": {"one": 1},
1317-
"struct_field_2": {"foo": u"123"},
1352+
"struct_field_2": {"foo": "123"},
13181353
},
13191354
{
1320-
"status": u"BAR",
1321-
"struct_field": {"two": u"111"},
1355+
"status": "BAR",
1356+
"struct_field": {"two": "111"},
13221357
"struct_field_2": {"bar": 27},
13231358
},
13241359
]
@@ -1351,7 +1386,7 @@ def test_dataframe_to_parquet_dict_sequence_schema(module_under_test):
13511386
]
13521387

13531388
dataframe = pandas.DataFrame(
1354-
{"field01": [u"hello", u"world"], "field02": [True, False]}
1389+
{"field01": ["hello", "world"], "field02": [True, False]}
13551390
)
13561391

13571392
write_table_patch = mock.patch.object(

0 commit comments

Comments
 (0)