diff --git a/pandas_gbq/load.py b/pandas_gbq/load.py index cf495e00..45e474b2 100644 --- a/pandas_gbq/load.py +++ b/pandas_gbq/load.py @@ -99,7 +99,16 @@ def cast_dataframe_for_parquet( errors="ignore", ) elif column_type in {"NUMERIC", "DECIMAL", "BIGNUMERIC", "BIGDECIMAL"}: - cast_column = dataframe[column_name].map(decimal.Decimal) + # decimal.Decimal does not support `None` or `pandas.NA` input, add + # support here. + # https://github.com/googleapis/python-bigquery-pandas/issues/719 + def convert(x): + if pandas.isna(x): # true for `None` and `pandas.NA` + return decimal.Decimal("NaN") + else: + return decimal.Decimal(x) + + cast_column = dataframe[column_name].map(convert) else: cast_column = None diff --git a/tests/unit/test_load.py b/tests/unit/test_load.py index 5f38d244..45c73533 100644 --- a/tests/unit/test_load.py +++ b/tests/unit/test_load.py @@ -369,3 +369,22 @@ def test_cast_dataframe_for_parquet_w_null_fields(): schema = {"fields": None} result = load.cast_dataframe_for_parquet(dataframe, schema) pandas.testing.assert_frame_equal(result, expected) + + +# Verifies null numerics are properly handled +# https://github.com/googleapis/python-bigquery-pandas/issues/719 +def test_cast_dataframe_for_parquet_w_null_numerics(): + from decimal import Decimal + + nans = pandas.Series([Decimal("3.14"), Decimal("nan"), None, pandas.NA]) + dataframe = pandas.DataFrame({"A": nans}) + + schema = {"fields": [{"name": "A", "type": "BIGNUMERIC"}]} + result = load.cast_dataframe_for_parquet(dataframe, schema) + + # pandas.testing.assert_frame_equal() doesn't distinguish Decimal("NaN") + # vs. None, verify Decimal("NaN") directly. + # https://github.com/pandas-dev/pandas/issues/18463 + assert result["A"][1].is_nan() + assert result["A"][2].is_nan() + assert result["A"][3].is_nan()