diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index ec6ad38bbc7cf..2f6d502901af1 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -246,6 +246,44 @@ source, you should no longer need to install Cython into your build environment Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. _whatsnew_100.api_breaking.nat_sort: + +Changed sort position for ``NaT`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:attr:`NaT` will now sort at the *end* rather than the beginning in sorting functions (:issue:`29884`). +This matches the behavior in NumPy 1.18 and newer, which makes the ``NaT`` behavior consistent +with other missing values like :attr:`numpy.nan`. + +.. ipython:: python + + values = pd.Index(['2001', 'NaT', '2000'], dtype='datetime64[ns]') + +*pandas 0.25.x* + +.. code-block:: python + + >>> values.sort_values() + DatetimeIndex(['NaT', '2000-01-01', '2001-01-01'], dtype='datetime64[ns]', freq=None) + + >>> values.argsort() + array([1, 2, 0]) + + +*pandas 1.0.0* + +.. ipython:: python + + values.sort_values() + values.argsort() + +This affects all sorting functions on indexes, Series, DataFrames, and arrays. + + +.. note:: + + This change was made between pandas 1.0.0rc0 and pandas 1.0.0. + .. _whatsnew_100.api_breaking.MultiIndex._names: Avoid using names from ``MultiIndex.levels`` diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 70637026c278d..d9b0ee0fc0ba0 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -710,7 +710,7 @@ def _from_factorized(cls, values, original): return cls(values, dtype=original.dtype) def _values_for_argsort(self): - return self._data + return self._data.view("M8[ns]") # ------------------------------------------------------------------ # Additional array methods diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 1bfec9fbad0ed..ea89fb38e8977 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -191,10 +191,8 @@ def sort_values(self, return_indexer=False, ascending=True): sorted_index = self.take(_as) return sorted_index, _as else: - # NB: using asi8 instead of _ndarray_values matters in numpy 1.18 - # because the treatment of NaT has been changed to put NaT last - # instead of first. - sorted_values = np.sort(self.asi8) + values = self._data + sorted_values = np.sort(values.view("M8[ns]")).view("i8") freq = self.freq if freq is not None and not is_period_dtype(self): @@ -224,6 +222,9 @@ def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): self, indices, axis, allow_fill, fill_value, **kwargs ) + def argsort(self, *args, **kwargs) -> np.ndarray: + return np.argsort(self._data, *args, **kwargs) + @Appender(_shared_docs["searchsorted"]) def searchsorted(self, value, side="left", sorter=None): if isinstance(value, str): diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 87b825c8c27bd..329311643c7f8 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -795,3 +795,11 @@ def test_to_numpy_extra(array): assert result[0] == result[1] tm.assert_equal(array, original) + + +@pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]", "Period[D]"]) +def test_argsort(dtype): + a = pd.array([2001, pd.NaT, 2000], dtype=dtype) + result = a.argsort() + expected = np.array([2, 0, 1]) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index ecd4ace705e9e..1f59cf428748d 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -247,7 +247,7 @@ def test_order_with_freq(self, idx): ), ( [pd.NaT, "2011-01-03", "2011-01-05", "2011-01-02", pd.NaT], - [pd.NaT, pd.NaT, "2011-01-02", "2011-01-03", "2011-01-05"], + ["2011-01-02", "2011-01-03", "2011-01-05", pd.NaT, pd.NaT], ), ], ) @@ -269,14 +269,17 @@ def test_order_without_freq(self, index_dates, expected_dates, tz_naive_fixture) ordered, indexer = index.sort_values(return_indexer=True) tm.assert_index_equal(ordered, expected) - exp = np.array([0, 4, 3, 1, 2]) + if index.isna().any(): + exp = np.array([3, 1, 2, 0, 4]) + else: + exp = np.array([0, 4, 3, 1, 2]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None ordered, indexer = index.sort_values(return_indexer=True, ascending=False) tm.assert_index_equal(ordered, expected[::-1]) - exp = np.array([2, 1, 3, 4, 0]) + exp = exp[::-1] tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None