pandas-dev · MartinBraquet · May 19, 2025 · May 19, 2025 · May 19, 2025 · May 19, 2025
diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py
@@ -758,6 +758,9 @@ class NSort:
     params = ["first", "last", "all"]
     param_names = ["keep"]
 
+    def __init__(self):
+        self.df = None
+
     def setup(self, keep):
         self.df = DataFrame(np.random.randn(100000, 3), columns=list("ABC"))
 
@@ -773,6 +776,12 @@ def time_nsmallest_one_column(self, keep):
     def time_nsmallest_two_columns(self, keep):
         self.df.nsmallest(100, ["A", "B"], keep=keep)
 
+    def time_nsorted_one_column(self, keep):
+        self.df.nsorted(100, "A", keep=keep, ascending=True)
+
+    def time_nsorted_two_columns(self, keep):
+        self.df.nsorted(100, ["A", "B"], keep=keep, ascending=[True, False])
+
 
 class Describe:
     def setup(self):

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -77,6 +77,7 @@ Other enhancements
 - :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
 - :py:class:`frozenset` elements in pandas objects are now natively printed (:issue:`60690`)
 - Add ``"delete_rows"`` option to ``if_exists`` argument in :meth:`DataFrame.to_sql` deleting all records of the table before inserting data (:issue:`37210`).
+- Added :meth:`DataFrame.nsorted` to select top ``n`` rows according to column-dependent order (:issue:`61166`)
 - Added half-year offset classes :class:`HalfYearBegin`, :class:`HalfYearEnd`, :class:`BHalfYearBegin` and :class:`BHalfYearEnd` (:issue:`60928`)
 - Added support to read from Apache Iceberg tables with the new :func:`read_iceberg` function (:issue:`61383`)
 - Errors occurring during SQL I/O will now throw a generic :class:`.DatabaseError` instead of the raw Exception type from the underlying driver manager library (:issue:`60748`)

@@ -429,7 +429,7 @@ def closed(self) -> bool:
 SortKind = Literal["quicksort", "mergesort", "heapsort", "stable"]
 NaPosition = Literal["first", "last"]
 
-# Arguments for nsmallest and nlargest
+# Arguments for nsorted, nsmallest and nlargest
 NsmallestNlargestKeep = Literal["first", "last", "all"]
 
 # quantile interpolation

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -7447,6 +7447,160 @@ def value_counts(
 
         return counts
 
+    def nsorted(
+        self,
+        n: int,
+        columns: IndexLabel,
+        ascending: bool | Sequence[bool],
+        keep: NsmallestNlargestKeep = "first",
+    ) -> DataFrame:
+        """
+        Return the first `n` rows ordered by `columns` in the order defined by
+        `ascending`.
+
+        The columns that are not specified are returned as
+        well, but not used for ordering.
+
+        This method is equivalent to
+        ``df.sort_values(columns, ascending=ascending).head(n)``, but more
+        performant.
+
+        Parameters
+        ----------
+        n : int
+            Number of rows to return.
+        columns : label or list of labels
+            Column label(s) to order by.
+        ascending : bool or list of bools
+            Whether to sort in ascending or descending order.
+            If a list, must be the same length as `columns`.
+        keep : {'first', 'last', 'all'}, default 'first'
+            Where there are duplicate values:
+
+            - ``first`` : prioritize the first occurrence(s)
+            - ``last`` : prioritize the last occurrence(s)
+            - ``all`` : keep all the ties of the smallest item even if it means
+              selecting more than ``n`` items.
+
+        Returns
+        -------
+        DataFrame
+            The first `n` rows ordered by the given columns in the order given
+            in `ascending`.
+
+        See Also
+        --------
+        DataFrame.nlargest : Return the first `n` rows ordered by `columns` in
+            descending order.
+        DataFrame.nsmallest : Return the first `n` rows ordered by `columns` in
+            ascending order.
+        DataFrame.sort_values : Sort DataFrame by the values.
+        DataFrame.head : Return the first `n` rows without re-ordering.
+
+        Notes
+        -----
+        This function cannot be used with all column types. For example, when
+        specifying columns with `object` or `category` dtypes, ``TypeError`` is
+        raised.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "population": [
+        ...             59000000,
+        ...             65000000,
+        ...             434000,
+        ...             434000,
+        ...             434000,
+        ...             337000,
+        ...             11300,
+        ...             11300,
+        ...             11300,
+        ...         ],
+        ...         "GDP": [1937894, 2583560, 12011, 4520, 12128, 17036, 182, 38, 311],
+        ...         "alpha-2": ["IT", "FR", "MT", "MV", "BN", "IS", "NR", "TV", "AI"],
+        ...     },
+        ...     index=[
+        ...         "Italy",
+        ...         "France",
+        ...         "Malta",
+        ...         "Maldives",
+        ...         "Brunei",
+        ...         "Iceland",
+        ...         "Nauru",
+        ...         "Tuvalu",
+        ...         "Anguilla",
+        ...     ],
+        ... )
+        >>> df
+                  population      GDP alpha-2
+        Italy       59000000  1937894      IT
+        France      65000000  2583560      FR
+        Malta         434000    12011      MT
+        Maldives      434000     4520      MV
+        Brunei        434000    12128      BN
+        Iceland       337000    17036      IS
+        Nauru          11300      182      NR
+        Tuvalu         11300       38      TV
+        Anguilla       11300      311      AI
+
+        In the following example, we will use ``nsorted`` to select the three
+        rows having the largest values in column "population".
+
+        >>> df.nsorted(3, "population", ascending=False)
+                population      GDP alpha-2
+        France    65000000  2583560      FR
+        Italy     59000000  1937894      IT
+        Malta       434000    12011      MT
+
+        When using ``keep='last'``, ties are resolved in reverse order:
+
+        >>> df.nsorted(3, "population", ascending=False, keep="last")
+                population      GDP alpha-2
+        France    65000000  2583560      FR
+        Italy     59000000  1937894      IT
+        Brunei      434000    12128      BN
+
+        When using ``keep='all'``, the number of elements kept can go beyond ``n``
+        if there are duplicate values for the smallest element. All the
+        ties are kept:
+
+        >>> df.nsorted(3, "population", ascending=False, keep="all")
+                  population      GDP alpha-2
+        France      65000000  2583560      FR
+        Italy       59000000  1937894      IT
+        Malta         434000    12011      MT
+        Maldives      434000     4520      MV
+        Brunei        434000    12128      BN
+
+        However, ``nsorted`` does not keep ``n`` distinct largest elements:
+
+        >>> df.nsorted(5, "population", ascending=False, keep="all")
+                  population      GDP alpha-2
+        France      65000000  2583560      FR
+        Italy       59000000  1937894      IT
+        Malta         434000    12011      MT
+        Maldives      434000     4520      MV
+        Brunei        434000    12128      BN
+
+        To order by the largest values in column "population" and break ties
+        according to the smallest values in column "GDP", we can specify
+        multiple columns and ascending orders like in the next example.
+
+        >>> df.nsorted(3, ["population", "GDP"], ascending=[False, True])
+                population      GDP alpha-2
+        France    65000000  2583560      FR
+        Italy     59000000  1937894      IT
+        Maldives      434000     4520      MV
+        """
+        return selectn.SelectNFrame(
+            self,
+            n=n,
+            keep=keep,
+            columns=columns,
+        ).nsorted(ascending=ascending)
+
     def nlargest(
         self, n: int, columns: IndexLabel, keep: NsmallestNlargestKeep = "first"
     ) -> DataFrame:
@@ -7457,6 +7611,9 @@ def nlargest(
         descending order. The columns that are not specified are returned as
         well, but not used for ordering.
 
+        This method is equivalent to
+        ``df.nsorted(n, columns, ascending=False)``.
+
         This method is equivalent to
         ``df.sort_values(columns, ascending=False).head(n)``, but more
         performant.
@@ -7485,6 +7642,8 @@ def nlargest(
         --------
         DataFrame.nsmallest : Return the first `n` rows ordered by `columns` in
             ascending order.
+        DataFrame.nsorted : Return the first `n` rows ordered by `columns` in
+            the order given in `ascending`.
         DataFrame.sort_values : Sort DataFrame by the values.
         DataFrame.head : Return the first `n` rows without re-ordering.
 
@@ -7553,7 +7712,7 @@ def nlargest(
         Italy     59000000  1937894      IT
         Brunei      434000    12128      BN
 
-        When using ``keep='all'``, the number of element kept can go beyond ``n``
+        When using ``keep='all'``, the number of elements kept can go beyond ``n``
         if there are duplicate values for the smallest element, all the
         ties are kept:
 
@@ -7584,7 +7743,7 @@ def nlargest(
         Italy     59000000  1937894      IT
         Brunei      434000    12128      BN
         """
-        return selectn.SelectNFrame(self, n=n, keep=keep, columns=columns).nlargest()
+        return self.nsorted(n=n, columns=columns, ascending=False, keep=keep)
 
     def nsmallest(
         self, n: int, columns: IndexLabel, keep: NsmallestNlargestKeep = "first"
@@ -7596,6 +7755,9 @@ def nsmallest(
         ascending order. The columns that are not specified are returned as
         well, but not used for ordering.
 
+        This method is equivalent to
+        ``df.nsorted(n, columns, ascending=True)``.
+
         This method is equivalent to
         ``df.sort_values(columns, ascending=True).head(n)``, but more
         performant.
@@ -7623,6 +7785,8 @@ def nsmallest(
         --------
         DataFrame.nlargest : Return the first `n` rows ordered by `columns` in
             descending order.
+        DataFrame.nsorted : Return the first `n` rows ordered by `columns` in
+            the order given in `ascending`.
         DataFrame.sort_values : Sort DataFrame by the values.
         DataFrame.head : Return the first `n` rows without re-ordering.
 
@@ -7715,7 +7879,7 @@ def nsmallest(
         Anguilla       11300  311      AI
         Nauru         337000  182      NR
         """
-        return selectn.SelectNFrame(self, n=n, keep=keep, columns=columns).nsmallest()
+        return self.nsorted(n=n, columns=columns, ascending=True, keep=keep)
 
     def swaplevel(self, i: Axis = -2, j: Axis = -1, axis: Axis = 0) -> DataFrame:
         """