Skip to content

Commit 53a4683

Browse files
authored
fix: handle None when converting numerics to parquet (#768)
* fix: handle None when converting numerics to parquet * lint and fix unit test * add check for pandas.NA
1 parent 631532c commit 53a4683

File tree

2 files changed

+29
-1
lines changed

2 files changed

+29
-1
lines changed

pandas_gbq/load.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,16 @@ def cast_dataframe_for_parquet(
9999
errors="ignore",
100100
)
101101
elif column_type in {"NUMERIC", "DECIMAL", "BIGNUMERIC", "BIGDECIMAL"}:
102-
cast_column = dataframe[column_name].map(decimal.Decimal)
102+
# decimal.Decimal does not support `None` or `pandas.NA` input, add
103+
# support here.
104+
# https://github.com/googleapis/python-bigquery-pandas/issues/719
105+
def convert(x):
106+
if pandas.isna(x): # true for `None` and `pandas.NA`
107+
return decimal.Decimal("NaN")
108+
else:
109+
return decimal.Decimal(x)
110+
111+
cast_column = dataframe[column_name].map(convert)
103112
else:
104113
cast_column = None
105114

tests/unit/test_load.py

+19
Original file line numberDiff line numberDiff line change
@@ -369,3 +369,22 @@ def test_cast_dataframe_for_parquet_w_null_fields():
369369
schema = {"fields": None}
370370
result = load.cast_dataframe_for_parquet(dataframe, schema)
371371
pandas.testing.assert_frame_equal(result, expected)
372+
373+
374+
# Verifies null numerics are properly handled
375+
# https://github.com/googleapis/python-bigquery-pandas/issues/719
376+
def test_cast_dataframe_for_parquet_w_null_numerics():
377+
from decimal import Decimal
378+
379+
nans = pandas.Series([Decimal("3.14"), Decimal("nan"), None, pandas.NA])
380+
dataframe = pandas.DataFrame({"A": nans})
381+
382+
schema = {"fields": [{"name": "A", "type": "BIGNUMERIC"}]}
383+
result = load.cast_dataframe_for_parquet(dataframe, schema)
384+
385+
# pandas.testing.assert_frame_equal() doesn't distinguish Decimal("NaN")
386+
# vs. None, verify Decimal("NaN") directly.
387+
# https://github.com/pandas-dev/pandas/issues/18463
388+
assert result["A"][1].is_nan()
389+
assert result["A"][2].is_nan()
390+
assert result["A"][3].is_nan()

0 commit comments

Comments
 (0)