From f665f4931553c791e0d2e139c17943e508c8f552 Mon Sep 17 00:00:00 2001 From: Chris Date: Wed, 5 Jul 2017 19:59:23 -0500 Subject: [PATCH 1/4] BUG: kind paramater on categorical argsort --- doc/source/whatsnew/v0.20.2.txt | 1 + pandas/core/categorical.py | 4 ++-- pandas/core/sorting.py | 2 +- pandas/tests/frame/test_sorting.py | 9 +++++++++ 4 files changed, 13 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 31125db0f34d4..1c2826d23844e 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -120,6 +120,7 @@ Categorical ^^^^^^^^^^^ - Fixed comparison operations considering the order of the categories when both categoricals are unordered (:issue:`16014`) +- Bug in ``DataFrame.sort_values`` not respecting the ``kind`` with categorical data (:issue:`16793`) Other ^^^^^ diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 796b2696af9ce..afae11163b0dc 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -1288,7 +1288,7 @@ def check_for_ordered(self, op): "you can use .as_ordered() to change the " "Categorical to an ordered one\n".format(op=op)) - def argsort(self, ascending=True, *args, **kwargs): + def argsort(self, ascending=True, kind='quicksort', *args, **kwargs): """ Returns the indices that would sort the Categorical instance if 'sort_values' was called. This function is implemented to provide @@ -1309,7 +1309,7 @@ def argsort(self, ascending=True, *args, **kwargs): numpy.ndarray.argsort """ ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs) - result = np.argsort(self._codes.copy(), **kwargs) + result = np.argsort(self._codes.copy(), kind=kind, **kwargs) if not ascending: result = result[::-1] return result diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 69b427df981b7..10b80cbc3483d 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -233,7 +233,7 @@ def nargsort(items, kind='quicksort', ascending=True, na_position='last'): # specially handle Categorical if is_categorical_dtype(items): - return items.argsort(ascending=ascending) + return items.argsort(ascending=ascending, kind=kind) items = np.asanyarray(items) idx = np.arange(len(items)) diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py index 98f7f82c0ace7..891c94b59074a 100644 --- a/pandas/tests/frame/test_sorting.py +++ b/pandas/tests/frame/test_sorting.py @@ -238,6 +238,15 @@ def test_stable_descending_multicolumn_sort(self): kind='mergesort') assert_frame_equal(sorted_df, expected) + def test_stable_categorial(self): + # GH 16793 + df = DataFrame({ + 'x': pd.Categorical(np.repeat([1, 2, 3, 4], 5), ordered=True) + }) + expected = df.copy() + sorted_df = df.sort_values('x', kind='mergesort') + assert_frame_equal(sorted_df, expected) + def test_sort_datetimes(self): # GH 3461, argsort / lexsort differences for a datetime column From 7c8810f6250f2b8825ec671996ab6c96792f975d Mon Sep 17 00:00:00 2001 From: Chris Date: Thu, 6 Jul 2017 17:22:45 -0500 Subject: [PATCH 2/4] fix validation logic --- doc/source/whatsnew/v0.20.3.txt | 1 + pandas/compat/numpy/function.py | 5 ++--- pandas/tests/test_categorical.py | 5 ++--- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index 3d1bed2c9f1a9..2f5b108f28fc5 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -92,6 +92,7 @@ Numeric Categorical ^^^^^^^^^^^ +- Bug in ``DataFrame.sort_values`` not respecting the ``kind`` with categorical data (:issue:`16793`) Other ^^^^^ diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index a324bf94171ce..5e80cc8992f4d 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -102,10 +102,9 @@ def validate_argmax_with_skipna(skipna, args, kwargs): ARGSORT_DEFAULTS = OrderedDict() ARGSORT_DEFAULTS['axis'] = -1 -ARGSORT_DEFAULTS['kind'] = 'quicksort' ARGSORT_DEFAULTS['order'] = None validate_argsort = CompatValidator(ARGSORT_DEFAULTS, fname='argsort', - max_fname_arg_count=0, method='both') + max_fname_arg_count=3, method='both') def validate_argsort_with_ascending(ascending, args, kwargs): @@ -121,7 +120,7 @@ def validate_argsort_with_ascending(ascending, args, kwargs): args = (ascending,) + args ascending = True - validate_argsort(args, kwargs, max_fname_arg_count=1) + validate_argsort(args, kwargs, max_fname_arg_count=3) return ascending diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 92177ca07d835..667b26c24c662 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -585,9 +585,8 @@ def test_numpy_argsort(self): tm.assert_numpy_array_equal(np.argsort(c), expected, check_dtype=False) - msg = "the 'kind' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.argsort, - c, kind='mergesort') + tm.assert_numpy_array_equal(np.argsort(c, kind='mergesort'), expected, + check_dtype=False) msg = "the 'axis' parameter is not supported" tm.assert_raises_regex(ValueError, msg, np.argsort, From 9c47243be57fb246b8f813adbfc98cf147c6388f Mon Sep 17 00:00:00 2001 From: Chris Date: Thu, 6 Jul 2017 17:27:12 -0500 Subject: [PATCH 3/4] whatsnew deletion --- doc/source/whatsnew/v0.20.2.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 1c2826d23844e..31125db0f34d4 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -120,7 +120,6 @@ Categorical ^^^^^^^^^^^ - Fixed comparison operations considering the order of the categories when both categoricals are unordered (:issue:`16014`) -- Bug in ``DataFrame.sort_values`` not respecting the ``kind`` with categorical data (:issue:`16793`) Other ^^^^^ From 89edf3578650970ea8821bd429ac5c0349c1c595 Mon Sep 17 00:00:00 2001 From: Chris Date: Thu, 6 Jul 2017 19:26:17 -0500 Subject: [PATCH 4/4] add back validation not supporting kind --- pandas/compat/numpy/function.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index 5e80cc8992f4d..ccbd3d9704e0c 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -102,9 +102,18 @@ def validate_argmax_with_skipna(skipna, args, kwargs): ARGSORT_DEFAULTS = OrderedDict() ARGSORT_DEFAULTS['axis'] = -1 +ARGSORT_DEFAULTS['kind'] = 'quicksort' ARGSORT_DEFAULTS['order'] = None validate_argsort = CompatValidator(ARGSORT_DEFAULTS, fname='argsort', - max_fname_arg_count=3, method='both') + max_fname_arg_count=0, method='both') + +# two different signatures of argsort, this second validation +# for when the `kind` param is supported +ARGSORT_DEFAULTS_KIND = OrderedDict() +ARGSORT_DEFAULTS_KIND['axis'] = -1 +ARGSORT_DEFAULTS_KIND['order'] = None +validate_argsort_kind = CompatValidator(ARGSORT_DEFAULTS_KIND, fname='argsort', + max_fname_arg_count=0, method='both') def validate_argsort_with_ascending(ascending, args, kwargs): @@ -120,7 +129,7 @@ def validate_argsort_with_ascending(ascending, args, kwargs): args = (ascending,) + args ascending = True - validate_argsort(args, kwargs, max_fname_arg_count=3) + validate_argsort_kind(args, kwargs, max_fname_arg_count=3) return ascending