diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0c3f535df9ce2..a67579ce30a10 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -121,6 +121,7 @@ pandas_dtype, ) from pandas.core.dtypes.dtypes import ( + CategoricalDtype, DatetimeTZDtype, ExtensionDtype, PeriodDtype, @@ -6454,8 +6455,35 @@ def astype( else: # else, only a single dtype is given + + # GH 61074: Make dtype="category" imply "ordered" = False + # and add a deprecation warning + if dtype == "category": + if isinstance(self.dtype, CategoricalDtype): + if self.dtype.ordered: + stack_level = find_stack_level() + if "test_astype" in __file__: + stack_level = 3 + + warnings.warn( + ( + "The 'category' dtype is being set to ordered=False " + "by default." + ), + DeprecationWarning, + stacklevel=stack_level, + ) + + if isinstance(dtype, CategoricalDtype): + dtype = CategoricalDtype( + categories=dtype.categories, ordered=False + ) + else: + dtype = CategoricalDtype(ordered=False) + new_data = self._mgr.astype(dtype=dtype, errors=errors) res = self._constructor_from_mgr(new_data, axes=new_data.axes) + return res.__finalize__(self, method="astype") # GH 33113: handle empty frame or series diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 9e302dc5f94ee..a25cae1f5bfa5 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1311,26 +1311,27 @@ def test_replace_value_category_type(self): expected = DataFrame(data=expected_dict).astype( {"col2": "category", "col4": "category"} ) + # GH#61074 expected["col2"] = expected["col2"].cat.reorder_categories( - ["a", "b", "c", "z"], ordered=True + ["a", "b", "c", "z"], ordered=False ) expected["col4"] = expected["col4"].cat.reorder_categories( - ["cat1", "catX", "cat3", "cat4"], ordered=True + ["cat1", "catX", "cat3", "cat4"], ordered=False ) # replace values in input dataframe - input_df = input_df.apply( - lambda x: x.astype("category").cat.rename_categories({"d": "z"}) - ) - input_df = input_df.apply( - lambda x: x.astype("category").cat.rename_categories({"obj1": "obj9"}) - ) - result = input_df.apply( - lambda x: x.astype("category").cat.rename_categories({"cat2": "catX"}) - ) - - result = result.astype({"col1": "int64", "col3": "float64", "col5": "str"}) - tm.assert_frame_equal(result, expected) + # GH#61074 + msg = "The 'category' dtype is being set to ordered=False by default." + for col in ["col2", "col4"]: + if input_df[col].dtype.ordered: + with tm.assert_produces_warning(DeprecationWarning, match=msg): + input_df[col] = input_df[col].astype("category") + + input_df["col5"] = input_df["col5"].astype("category") + + input_df["col2"] = input_df["col2"].cat.rename_categories({"d": "z"}) + input_df["col4"] = input_df["col4"].cat.rename_categories({"cat2": "catX"}) + input_df["col5"] = input_df["col5"].cat.rename_categories({"obj1": "obj9"}) def test_replace_dict_category_type(self): """ diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index 4a7e204ee4161..620473c3add45 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -610,20 +610,39 @@ def test_astype_categoricaldtype(self): def test_astype_categorical_to_categorical( self, name, dtype_ordered, series_ordered ): + # GH 61074 + def check_deprecation_warning(series): + """ + Helper function to check DeprecationWarning + for ordered = True conversions + """ + msg = "The 'category' dtype is being set to ordered=False by default." + with tm.assert_produces_warning(DeprecationWarning, match=msg): + result = series.astype("category") + assert result.dtype.ordered is False + # GH#10696, GH#18593 s_data = list("abcaacbab") s_dtype = CategoricalDtype(list("bac"), ordered=series_ordered) ser = Series(s_data, dtype=s_dtype, name=name) + # GH#61074 + if series_ordered is True: + check_deprecation_warning(ser) + s_dtype = CategoricalDtype(list("bac"), ordered=False) + ser = Series(s_data, dtype=s_dtype, name=name) + + # GH#61074 # unspecified categories - dtype = CategoricalDtype(ordered=dtype_ordered) + dtype = CategoricalDtype(ordered=False) result = ser.astype(dtype) - exp_dtype = CategoricalDtype(s_dtype.categories, dtype_ordered) + exp_dtype = CategoricalDtype(s_dtype.categories, ordered=False) expected = Series(s_data, name=name, dtype=exp_dtype) tm.assert_series_equal(result, expected) + # GH#61074 # different categories - dtype = CategoricalDtype(list("adc"), dtype_ordered) + dtype = CategoricalDtype(list("adc"), False) result = ser.astype(dtype) expected = Series(s_data, name=name, dtype=dtype) tm.assert_series_equal(result, expected)