From d828a44aaf4e3917e2d886576319a3d8c0511691 Mon Sep 17 00:00:00 2001 From: Alan Velasco Date: Tue, 19 Sep 2017 19:44:36 -0500 Subject: [PATCH 1/3] Allow for dict-like categorical renaming --- doc/source/categorical.rst | 4 ++++ doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/categorical.py | 9 +++++++-- pandas/tests/test_categorical.py | 29 +++++++++++++++++++++++++++++ 4 files changed, 41 insertions(+), 2 deletions(-) diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst index 65361886436d6..ff5e550ebd97f 100644 --- a/doc/source/categorical.rst +++ b/doc/source/categorical.rst @@ -206,6 +206,10 @@ by using the :func:`Categorical.rename_categories` method: s.cat.categories = ["Group %s" % g for g in s.cat.categories] s s.cat.rename_categories([1,2,3]) + s + # You can also pass a dict-like object to map the renaming + s.cat.rename_categories({1: 'x', 2: 'y', 3: 'z'}) + s .. note:: diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 5a353544a4283..dfe7935f0ac4d 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -115,6 +115,7 @@ Other Enhancements - :func:`DataFrame.items` and :func:`Series.items` is now present in both Python 2 and 3 and is lazy in all cases (:issue:`13918`, :issue:`17213`) - :func:`Styler.where` has been implemented. It is as a convenience for :func:`Styler.applymap` and enables simple DataFrame styling on the Jupyter notebook (:issue:`17474`). - :func:`MultiIndex.is_monotonic_decreasing` has been implemented. Previously returned ``False`` in all cases. (:issue:`16554`) +- :func:`Categorical.rename_categories` now accepts a dict-like argument as `new_categories`, and only updates the categories found in that dict. (:issue:`17336`) .. _whatsnew_0210.api_breaking: diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index ddca93f07ad5e..4e308968afcd3 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -25,7 +25,8 @@ is_categorical_dtype, is_integer_dtype, is_bool, is_list_like, is_sequence, - is_scalar) + is_scalar, + is_dict_like) from pandas.core.common import is_null_slice, _maybe_box_datetimelike from pandas.core.algorithms import factorize, take_1d, unique1d @@ -824,7 +825,11 @@ def rename_categories(self, new_categories, inplace=False): """ inplace = validate_bool_kwarg(inplace, 'inplace') cat = self if inplace else self.copy() - cat.categories = new_categories + if is_dict_like(new_categories): + cat.categories = [new_categories.get(item, item) + for item in cat.categories] + else: + cat.categories = new_categories if not inplace: return cat diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index c361b430cfd8a..29c8ab9d12e70 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -983,6 +983,35 @@ def test_rename_categories(self): with pytest.raises(ValueError): cat.rename_categories([1, 2]) + def test_rename_categories_dict(self): + # GH 17336 + cat = pd.Categorical(['a', 'b', 'c', 'd']) + res = cat.rename_categories({'a': 4, 'b': 3, 'c': 2, 'd': 1}) + expected = Index([4, 3, 2, 1]) + tm.assert_index_equal(res.categories, expected) + # Test for inplace + res = cat.rename_categories({'a': 4, 'b': 3, 'c': 2, 'd': 1}, + inplace=True) + assert res is None + + tm.assert_index_equal(cat.categories, expected) + # Test for dicts of smaller length + cat = pd.Categorical(['a', 'b', 'c', 'd']) + res = cat.rename_categories({'a': 1, 'c': 3}) + expected = Index([1, 'b', 3, 'd']) + tm.assert_index_equal(res.categories, expected) + # Test for dicts with bigger length + cat = pd.Categorical(['a', 'b', 'c', 'd']) + res = cat.rename_categories({'a': 1, 'b': 2, 'c': 3, + 'd': 4, 'e': 5, 'f': 6}) + expected = Index([1, 2, 3, 4]) + tm.assert_index_equal(res.categories, expected) + # Test for dicts with no items from old categories + cat = pd.Categorical(['a', 'b', 'c', 'd']) + res = cat.rename_categories({'f': 1, 'g': 3}) + expected = Index(['a', 'b', 'c', 'd']) + tm.assert_index_equal(res.categories, expected) + @pytest.mark.parametrize('codes, old, new, expected', [ ([0, 1], ['a', 'b'], ['a', 'b'], [0, 1]), ([0, 1], ['b', 'a'], ['b', 'a'], [0, 1]), From 0b056638fc6aa71950adf35e6eee514d3161596b Mon Sep 17 00:00:00 2001 From: Alan Velasco Date: Wed, 20 Sep 2017 10:50:34 -0500 Subject: [PATCH 2/3] Improve readibility --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/core/categorical.py | 1 + pandas/tests/test_categorical.py | 7 ++++++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index dfe7935f0ac4d..f440ca6e82828 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -115,7 +115,7 @@ Other Enhancements - :func:`DataFrame.items` and :func:`Series.items` is now present in both Python 2 and 3 and is lazy in all cases (:issue:`13918`, :issue:`17213`) - :func:`Styler.where` has been implemented. It is as a convenience for :func:`Styler.applymap` and enables simple DataFrame styling on the Jupyter notebook (:issue:`17474`). - :func:`MultiIndex.is_monotonic_decreasing` has been implemented. Previously returned ``False`` in all cases. (:issue:`16554`) -- :func:`Categorical.rename_categories` now accepts a dict-like argument as `new_categories`, and only updates the categories found in that dict. (:issue:`17336`) +- :func:`Categorical.rename_categories` now accepts a dict-like argument as `new_categories` and only updates the categories found in that dict. (:issue:`17336`) .. _whatsnew_0210.api_breaking: diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 4e308968afcd3..1894df9f23e25 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -825,6 +825,7 @@ def rename_categories(self, new_categories, inplace=False): """ inplace = validate_bool_kwarg(inplace, 'inplace') cat = self if inplace else self.copy() + if is_dict_like(new_categories): cat.categories = [new_categories.get(item, item) for item in cat.categories] diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 29c8ab9d12e70..e6fa5d1af55be 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -989,26 +989,31 @@ def test_rename_categories_dict(self): res = cat.rename_categories({'a': 4, 'b': 3, 'c': 2, 'd': 1}) expected = Index([4, 3, 2, 1]) tm.assert_index_equal(res.categories, expected) + # Test for inplace res = cat.rename_categories({'a': 4, 'b': 3, 'c': 2, 'd': 1}, inplace=True) assert res is None - tm.assert_index_equal(cat.categories, expected) + # Test for dicts of smaller length cat = pd.Categorical(['a', 'b', 'c', 'd']) res = cat.rename_categories({'a': 1, 'c': 3}) + expected = Index([1, 'b', 3, 'd']) tm.assert_index_equal(res.categories, expected) + # Test for dicts with bigger length cat = pd.Categorical(['a', 'b', 'c', 'd']) res = cat.rename_categories({'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6}) expected = Index([1, 2, 3, 4]) tm.assert_index_equal(res.categories, expected) + # Test for dicts with no items from old categories cat = pd.Categorical(['a', 'b', 'c', 'd']) res = cat.rename_categories({'f': 1, 'g': 3}) + expected = Index(['a', 'b', 'c', 'd']) tm.assert_index_equal(res.categories, expected) From 886340889219cdf4ff781411e5a40ff10d736365 Mon Sep 17 00:00:00 2001 From: Alan Velasco Date: Thu, 21 Sep 2017 15:51:38 -0500 Subject: [PATCH 3/3] Update docstring on rename_categories --- pandas/core/categorical.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 1894df9f23e25..6f7eafe43dbbb 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -793,19 +793,20 @@ def set_categories(self, new_categories, ordered=None, rename=False, def rename_categories(self, new_categories, inplace=False): """ Renames categories. - The new categories has to be a list-like object. All items must be - unique and the number of items in the new categories must be the same - as the number of items in the old categories. + The new categories can be either a list-like dict-like object. + If it is list-like, all items must be unique and the number of items + in the new categories must be the same as the number of items in the + old categories. Raises ------ ValueError - If the new categories do not have the same number of items than the - current categories or do not validate as categories + If new categories are list-like and do not have the same number of + items than the current categories or do not validate as categories Parameters ---------- - new_categories : Index-like + new_categories : Index-like or dict-like (>=0.21.0) The renamed categories. inplace : boolean (default: False) Whether or not to rename the categories inplace or return a copy of