From 167bfe2df69806405c4f4dde20bb227c3b75f7f1 Mon Sep 17 00:00:00 2001 From: Yi Date: Sun, 23 Mar 2025 15:52:27 -0400 Subject: [PATCH 1/6] Add numpy.dtypes type check in to_json function --- .../src/vendored/ujson/python/objToJSON.c | 23 +++++++++++++++++++ .../_libs/src/vendored/ujson/python/ujson.c | 10 ++++++++ 2 files changed, 33 insertions(+) diff --git a/pandas/_libs/src/vendored/ujson/python/objToJSON.c b/pandas/_libs/src/vendored/ujson/python/objToJSON.c index 8342dbcd1763d..2db77c4662e84 100644 --- a/pandas/_libs/src/vendored/ujson/python/objToJSON.c +++ b/pandas/_libs/src/vendored/ujson/python/objToJSON.c @@ -62,6 +62,7 @@ int object_is_series_type(PyObject *obj); int object_is_index_type(PyObject *obj); int object_is_nat_type(PyObject *obj); int object_is_na_type(PyObject *obj); +int object_is_ndtypes_type(PyObject *obj); typedef struct __NpyArrContext { PyObject *array; @@ -396,6 +397,24 @@ static const char *PyDecimalToUTF8Callback(JSOBJ _obj, JSONTypeContext *tc, return outValue; } +static const char *PyNpyDtypesToUTF8Callback(JSOBJ _obj, JSONTypeContext *tc, + size_t *len) { + PyObject *obj = (PyObject *)_obj; + PyObject *str = PyObject_Str(obj); + + if (str == NULL) { + ((JSONObjectEncoder *)tc->encoder)->errorMsg = ""; + return NULL; + } + + Py_ssize_t s_len; + char *outValue = (char *)PyUnicode_AsUTF8AndSize(str, &s_len); + *len = s_len; + Py_DECREF(str); + + return outValue; +} + //============================================================================= // Numpy array iteration functions //============================================================================= @@ -1583,6 +1602,10 @@ static void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { } else if (object_is_na_type(obj)) { tc->type = JT_NULL; return; + } else if (object_is_ndtypes_type(obj)) { + tc->type = JT_UTF8; + pc->PyTypeToUTF8 = PyNpyDtypesToUTF8Callback; + return; } ISITERABLE: diff --git a/pandas/_libs/src/vendored/ujson/python/ujson.c b/pandas/_libs/src/vendored/ujson/python/ujson.c index 2ee084b9304f4..17cd4e024df01 100644 --- a/pandas/_libs/src/vendored/ujson/python/ujson.c +++ b/pandas/_libs/src/vendored/ujson/python/ujson.c @@ -211,6 +211,16 @@ int object_is_na_type(PyObject *obj) { } return result; } + +int object_is_ndtypes_type(PyObject * obj){ + PyObject * ndtype = (PyObject*)&PyArrayDescr_Type; + int result = PyObject_IsInstance(obj, ndtype); + if (result == -1) { + PyErr_Clear(); + return 0; + } + return result; +} #else /* Used in objToJSON.c */ int object_is_decimal_type(PyObject *obj) { From 25e7e8475f0854c551a7195a6a8c748c8d97f7a0 Mon Sep 17 00:00:00 2001 From: Yi Date: Sun, 23 Mar 2025 16:14:54 -0400 Subject: [PATCH 2/6] Add unit tests and whatsnew info --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/_libs/src/vendored/ujson/python/ujson.c | 4 ++-- pandas/tests/io/json/test_pandas.py | 16 ++++++++++++++++ 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index bad06329c4bfa..c45959d475af4 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -749,6 +749,7 @@ I/O - Bug in :meth:`read_stata` where the missing code for double was not recognised for format versions 105 and prior (:issue:`58149`) - Bug in :meth:`set_option` where setting the pandas option ``display.html.use_mathjax`` to ``False`` has no effect (:issue:`59884`) - Bug in :meth:`to_excel` where :class:`MultiIndex` columns would be merged to a single row when ``merge_cells=False`` is passed (:issue:`60274`) +- Bug in :meth:`to_json` raising ``OverflowError`` when convert DataFrame.dtypes Series to JSON (:issue:`61170`) Period ^^^^^^ diff --git a/pandas/_libs/src/vendored/ujson/python/ujson.c b/pandas/_libs/src/vendored/ujson/python/ujson.c index 17cd4e024df01..d590e51f1fa44 100644 --- a/pandas/_libs/src/vendored/ujson/python/ujson.c +++ b/pandas/_libs/src/vendored/ujson/python/ujson.c @@ -212,8 +212,8 @@ int object_is_na_type(PyObject *obj) { return result; } -int object_is_ndtypes_type(PyObject * obj){ - PyObject * ndtype = (PyObject*)&PyArrayDescr_Type; +int object_is_ndtypes_type(PyObject *obj) { + PyObject *ndtype = (PyObject *)&PyArrayDescr_Type; int result = PyObject_IsInstance(obj, ndtype); if (result == -1) { PyErr_Clear(); diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 32eeb30de4b69..3423d2a0f4719 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -2311,3 +2311,19 @@ def test_large_number(): ) expected = Series([9999999999999999]) tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "dataframe, expected_json", + [ + (DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), '{"A":"int64","B":"int64"}'), + ( + DataFrame({"X": [1.1, 2.2], "Y": ["a", "b"]}), + '{"X":"float64","Y":"object"}', + ), + ], +) +def test_dtypes_to_json(dataframe: DataFrame, expected_json): + # GH 61170 + dtypes_json = dataframe.dtypes.to_json() + assert json.loads(dtypes_json) == json.loads(expected_json) From cf7d95c35f5feb3662dd0c98ec23114684bacb1a Mon Sep 17 00:00:00 2001 From: Yi Date: Sun, 23 Mar 2025 20:57:00 -0400 Subject: [PATCH 3/6] Add GET_TC --- pandas/_libs/src/vendored/ujson/python/objToJSON.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/src/vendored/ujson/python/objToJSON.c b/pandas/_libs/src/vendored/ujson/python/objToJSON.c index 2db77c4662e84..2ab695e9ea02e 100644 --- a/pandas/_libs/src/vendored/ujson/python/objToJSON.c +++ b/pandas/_libs/src/vendored/ujson/python/objToJSON.c @@ -407,10 +407,11 @@ static const char *PyNpyDtypesToUTF8Callback(JSOBJ _obj, JSONTypeContext *tc, return NULL; } + GET_TC(tc)->newObj = str; + Py_ssize_t s_len; char *outValue = (char *)PyUnicode_AsUTF8AndSize(str, &s_len); *len = s_len; - Py_DECREF(str); return outValue; } From 055dc9683e5aa6ec5ca1d281cba7b0cbda4ac5b7 Mon Sep 17 00:00:00 2001 From: Yi Date: Mon, 24 Mar 2025 01:55:27 -0400 Subject: [PATCH 4/6] Change Unit test --- pandas/tests/io/json/test_pandas.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 3423d2a0f4719..2121ee9bca3ee 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -2314,16 +2314,13 @@ def test_large_number(): @pytest.mark.parametrize( - "dataframe, expected_json", + "df", [ - (DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), '{"A":"int64","B":"int64"}'), - ( - DataFrame({"X": [1.1, 2.2], "Y": ["a", "b"]}), - '{"X":"float64","Y":"object"}', - ), + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + DataFrame({"X": [1.1, 2.2], "Y": ["a", "b"]}), ], ) -def test_dtypes_to_json(dataframe: DataFrame, expected_json): - # GH 61170 - dtypes_json = dataframe.dtypes.to_json() - assert json.loads(dtypes_json) == json.loads(expected_json) +def test_dtypes_to_json_consistency(df: DataFrame): + expected = df.dtypes.apply(str).to_json() + result = df.dtypes.to_json() + assert json.loads(result) == json.loads(expected) From 635b6e73e1f9b5a95a52fd5134bf4dd9777a0e41 Mon Sep 17 00:00:00 2001 From: Yi Date: Mon, 24 Mar 2025 01:59:04 -0400 Subject: [PATCH 5/6] Add Github issue ID --- pandas/tests/io/json/test_pandas.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 2121ee9bca3ee..8626ca0cce943 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -2321,6 +2321,7 @@ def test_large_number(): ], ) def test_dtypes_to_json_consistency(df: DataFrame): + # GH 61170 expected = df.dtypes.apply(str).to_json() result = df.dtypes.to_json() assert json.loads(result) == json.loads(expected) From f6abdde33817fb3975c5e89fb2901eadecb0dc12 Mon Sep 17 00:00:00 2001 From: Yi Date: Mon, 24 Mar 2025 10:32:03 -0400 Subject: [PATCH 6/6] Modify unit test --- pandas/tests/io/json/test_pandas.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 8626ca0cce943..00cebd0a2248f 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -2324,4 +2324,8 @@ def test_dtypes_to_json_consistency(df: DataFrame): # GH 61170 expected = df.dtypes.apply(str).to_json() result = df.dtypes.to_json() - assert json.loads(result) == json.loads(expected) + result = json.loads(result) + for k in result: + if "name" in result[k]: + result[k] = result[k]["name"] + assert result == json.loads(expected)