From 1bf67cf0966a9674fd54cbae2f26f382fb392f5d Mon Sep 17 00:00:00 2001 From: Kristian Holsheimer Date: Mon, 8 Oct 2018 11:30:49 +1100 Subject: [PATCH 1/7] By convention, tuples indicate multi-level lookups. This change fixes nlargest/smallest functionality for dataframes with MultiIndex columns. --- pandas/core/algorithms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 0f1eb12883fd5..df2da26685a16 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1161,7 +1161,7 @@ class SelectNFrame(SelectN): def __init__(self, obj, n, keep, columns): super(SelectNFrame, self).__init__(obj, n, keep) - if not is_list_like(columns): + if not is_list_like(columns) or isinstance(columns, tuple): columns = [columns] columns = list(columns) self.columns = columns From 1910ca4a7e175a5841a123b5b6236be68d2c3062 Mon Sep 17 00:00:00 2001 From: Kristian Holsheimer Date: Mon, 8 Oct 2018 13:28:56 +1100 Subject: [PATCH 2/7] Fixed tests (tuples are reserved for multi-level lookups) --- pandas/tests/frame/test_analytics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index b83fba7e7b277..a37dcc8a11cb2 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -2153,7 +2153,7 @@ def test_n(self, df_strings, nselect_method, n, order): tm.assert_frame_equal(result, expected) @pytest.mark.parametrize('columns', [ - ('group', 'category_string'), ('group', 'string')]) + ['group', 'category_string'], ['group', 'string']]) def test_n_error(self, df_main_dtypes, nselect_method, columns): df = df_main_dtypes col = columns[1] From 06bfd0ad0fb7abba80b18b2c77757891ec914f05 Mon Sep 17 00:00:00 2001 From: Kristian Holsheimer Date: Mon, 8 Oct 2018 14:32:10 +1100 Subject: [PATCH 3/7] Added test coverage for multi-level (tuple) lookups. --- pandas/tests/frame/test_analytics.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index a37dcc8a11cb2..818a02b7b6991 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -2259,3 +2259,14 @@ def test_series_nat_conversion(self): df.rank() result = df tm.assert_frame_equal(result, expected) + + def test_multiindex_column_lookup(self): + df = pd.DataFrame( + columns=pd.MultiIndex.from_product([['x'], ['a', 'b']]), + data=[[0.33, 0.13], [0.86, 0.25], [0.25, 0.70], [0.85, 0.91]]) + pd.util.testing.assert_frame_equal( + df.nsmallest(3, ('x', 'a')), + df.iloc[[2, 0, 3]]) + pd.util.testing.assert_frame_equal( + df.nlargest(3, ('x', 'b')), + df.iloc[[3, 2, 1]]) From 5530b9a4428f87a8dee15e3c55fd1fa4156f35e0 Mon Sep 17 00:00:00 2001 From: Kristian Holsheimer Date: Thu, 11 Oct 2018 11:52:39 +1100 Subject: [PATCH 4/7] Updated test: Added github issue id and used 'tm' instead of 'pd.util.testing' --- pandas/tests/frame/test_analytics.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 818a02b7b6991..ab4eaf02f38dd 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -2261,12 +2261,18 @@ def test_series_nat_conversion(self): tm.assert_frame_equal(result, expected) def test_multiindex_column_lookup(self): + # Check whether tuples are correctly treated as multi-level lookups. + # GH 23033 df = pd.DataFrame( columns=pd.MultiIndex.from_product([['x'], ['a', 'b']]), data=[[0.33, 0.13], [0.86, 0.25], [0.25, 0.70], [0.85, 0.91]]) - pd.util.testing.assert_frame_equal( - df.nsmallest(3, ('x', 'a')), - df.iloc[[2, 0, 3]]) - pd.util.testing.assert_frame_equal( - df.nlargest(3, ('x', 'b')), - df.iloc[[3, 2, 1]]) + + # nsmallest + result = df.nsmallest(3, ('x', 'a')) + expected = df.iloc[[2, 0, 3]] + tm.assert_frame_equal(result, expected) + + # nlargest + result = df.nlargest(3, ('x', 'b')) + expected = df.iloc[[3, 2, 1]] + tm.assert_frame_equal(result, expected) From d8abc0cecb9ba5c6ede27cc43120ad2f536a5a9a Mon Sep 17 00:00:00 2001 From: Kristian Holsheimer Date: Thu, 11 Oct 2018 12:53:15 +1100 Subject: [PATCH 5/7] Added whatsnew note in Bug Fixes / Reshaping --- doc/source/whatsnew/v0.24.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index a41b0c9521f99..28d5dbab2769b 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1208,6 +1208,7 @@ Sparse - Bug in ``DataFrame.groupby`` not including ``fill_value`` in the groups for non-NA ``fill_value`` when grouping by a sparse column (:issue:`5078`) - Bug in unary inversion operator (``~``) on a ``SparseSeries`` with boolean values. The performance of this has also been improved (:issue:`22835`) - Bug in :meth:`SparseArary.unique` not returning the unique values (:issue:`19595`) +- Fixed :meth:`DataFrame.nsmallest` and :meth:`DataFrame.nlargest` for dataframes that have :class:`MultiIndex`ed columns (:issue:`23033`). Build Changes ^^^^^^^^^^^^^ From b65c6a0978765d39e41cc25ac4c0cf327b763c38 Mon Sep 17 00:00:00 2001 From: Kristian Holsheimer Date: Thu, 11 Oct 2018 12:59:30 +1100 Subject: [PATCH 6/7] Fixed typo --- doc/source/whatsnew/v0.24.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 28d5dbab2769b..e49892073c9ef 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1208,7 +1208,7 @@ Sparse - Bug in ``DataFrame.groupby`` not including ``fill_value`` in the groups for non-NA ``fill_value`` when grouping by a sparse column (:issue:`5078`) - Bug in unary inversion operator (``~``) on a ``SparseSeries`` with boolean values. The performance of this has also been improved (:issue:`22835`) - Bug in :meth:`SparseArary.unique` not returning the unique values (:issue:`19595`) -- Fixed :meth:`DataFrame.nsmallest` and :meth:`DataFrame.nlargest` for dataframes that have :class:`MultiIndex`ed columns (:issue:`23033`). +- Bug in :meth:`DataFrame.nsmallest` and :meth:`DataFrame.nlargest` for dataframes that have :class:`MultiIndex`ed columns (:issue:`23033`). Build Changes ^^^^^^^^^^^^^ From f77f164b0ced6dcb4bf3e0d485dcc4feb08b312c Mon Sep 17 00:00:00 2001 From: Kristian Holsheimer Date: Wed, 24 Oct 2018 07:56:42 +1100 Subject: [PATCH 7/7] fixed whatsnew note: was in wrong section --- doc/source/whatsnew/v0.24.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index e49892073c9ef..a547edec2f3ce 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1194,6 +1194,7 @@ Reshaping - Bug in :func:`merge_asof` when merging on float values within defined tolerance (:issue:`22981`) - Bug in :func:`pandas.concat` when concatenating a multicolumn DataFrame with tz-aware data against a DataFrame with a different number of columns (:issue`22796`) - Bug in :func:`merge_asof` where confusing error message raised when attempting to merge with missing values (:issue:`23189`) +- Bug in :meth:`DataFrame.nsmallest` and :meth:`DataFrame.nlargest` for dataframes that have :class:`MultiIndex`ed columns (:issue:`23033`). .. _whatsnew_0240.bug_fixes.sparse: @@ -1208,7 +1209,6 @@ Sparse - Bug in ``DataFrame.groupby`` not including ``fill_value`` in the groups for non-NA ``fill_value`` when grouping by a sparse column (:issue:`5078`) - Bug in unary inversion operator (``~``) on a ``SparseSeries`` with boolean values. The performance of this has also been improved (:issue:`22835`) - Bug in :meth:`SparseArary.unique` not returning the unique values (:issue:`19595`) -- Bug in :meth:`DataFrame.nsmallest` and :meth:`DataFrame.nlargest` for dataframes that have :class:`MultiIndex`ed columns (:issue:`23033`). Build Changes ^^^^^^^^^^^^^