Skip to content

Commit 25ff472

Browse files
jorisvandenbosscheTomAugspurger
authored andcommitted
API: change Index set ops sort=True -> sort=None (#25063)
1 parent d4b42c8 commit 25ff472

File tree

17 files changed

+465
-129
lines changed

17 files changed

+465
-129
lines changed

Diff for: doc/source/whatsnew/v0.24.1.rst

+32-2
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,40 @@ Whats New in 0.24.1 (February XX, 2019)
1515
These are the changes in pandas 0.24.1. See :ref:`release` for a full changelog
1616
including other versions of pandas.
1717

18+
.. _whatsnew_0241.api:
19+
20+
API Changes
21+
~~~~~~~~~~~
22+
23+
Changing the ``sort`` parameter for :class:`Index` set operations
24+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
25+
26+
The default ``sort`` value for :meth:`Index.union` has changed from ``True`` to ``None`` (:issue:`24959`).
27+
The default *behavior*, however, remains the same: the result is sorted, unless
28+
29+
1. ``self`` and ``other`` are identical
30+
2. ``self`` or ``other`` is empty
31+
3. ``self`` or ``other`` contain values that can not be compared (a ``RuntimeWarning`` is raised).
32+
33+
This change will allow ``sort=True`` to mean "always sort" in a future release.
34+
35+
The same change applies to :meth:`Index.difference` and :meth:`Index.symmetric_difference`, which
36+
would not sort the result when the values could not be compared.
37+
38+
The `sort` option for :meth:`Index.intersection` has changed in three ways.
39+
40+
1. The default has changed from ``True`` to ``False``, to restore the
41+
pandas 0.23.4 and earlier behavior of not sorting by default.
42+
2. The behavior of ``sort=True`` can now be obtained with ``sort=None``.
43+
This will sort the result only if the values in ``self`` and ``other``
44+
are not identical.
45+
3. The value ``sort=True`` is no longer allowed. A future version of pandas
46+
will properly support ``sort=True`` meaning "always sort".
47+
1848
.. _whatsnew_0241.regressions:
1949

2050
Fixed Regressions
21-
^^^^^^^^^^^^^^^^^
51+
~~~~~~~~~~~~~~~~~
2252

2353
- Bug in :meth:`DataFrame.itertuples` with ``records`` orient raising an ``AttributeError`` when the ``DataFrame`` contained more than 255 columns (:issue:`24939`)
2454
- Bug in :meth:`DataFrame.itertuples` orient converting integer column names to strings prepended with an underscore (:issue:`24940`)
@@ -30,7 +60,7 @@ Fixed Regressions
3060
.. _whatsnew_0241.enhancements:
3161

3262
Enhancements
33-
^^^^^^^^^^^^
63+
~~~~~~~~~~~~
3464

3565

3666
.. _whatsnew_0241.bug_fixes:

Diff for: pandas/_libs/lib.pyx

+2-1
Original file line numberDiff line numberDiff line change
@@ -233,10 +233,11 @@ def fast_unique_multiple(list arrays, sort: bool=True):
233233
if val not in table:
234234
table[val] = stub
235235
uniques.append(val)
236-
if sort:
236+
if sort is None:
237237
try:
238238
uniques.sort()
239239
except Exception:
240+
# TODO: RuntimeWarning?
240241
pass
241242

242243
return uniques

Diff for: pandas/core/indexes/api.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ def _get_combined_index(indexes, intersect=False, sort=False):
112112
elif intersect:
113113
index = indexes[0]
114114
for other in indexes[1:]:
115-
index = index.intersection(other, sort=sort)
115+
index = index.intersection(other)
116116
else:
117117
index = _union_indexes(indexes, sort=sort)
118118
index = ensure_index(index)

Diff for: pandas/core/indexes/base.py

+66-16
Original file line numberDiff line numberDiff line change
@@ -2245,18 +2245,37 @@ def _get_reconciled_name_object(self, other):
22452245
return self._shallow_copy(name=name)
22462246
return self
22472247

2248-
def union(self, other, sort=True):
2248+
def _validate_sort_keyword(self, sort):
2249+
if sort not in [None, False]:
2250+
raise ValueError("The 'sort' keyword only takes the values of "
2251+
"None or False; {0} was passed.".format(sort))
2252+
2253+
def union(self, other, sort=None):
22492254
"""
22502255
Form the union of two Index objects.
22512256
22522257
Parameters
22532258
----------
22542259
other : Index or array-like
2255-
sort : bool, default True
2256-
Sort the resulting index if possible
2260+
sort : bool or None, default None
2261+
Whether to sort the resulting Index.
2262+
2263+
* None : Sort the result, except when
2264+
2265+
1. `self` and `other` are equal.
2266+
2. `self` or `other` has length 0.
2267+
3. Some values in `self` or `other` cannot be compared.
2268+
A RuntimeWarning is issued in this case.
2269+
2270+
* False : do not sort the result.
22572271
22582272
.. versionadded:: 0.24.0
22592273
2274+
.. versionchanged:: 0.24.1
2275+
2276+
Changed the default value from ``True`` to ``None``
2277+
(without change in behaviour).
2278+
22602279
Returns
22612280
-------
22622281
union : Index
@@ -2269,6 +2288,7 @@ def union(self, other, sort=True):
22692288
>>> idx1.union(idx2)
22702289
Int64Index([1, 2, 3, 4, 5, 6], dtype='int64')
22712290
"""
2291+
self._validate_sort_keyword(sort)
22722292
self._assert_can_do_setop(other)
22732293
other = ensure_index(other)
22742294

@@ -2319,7 +2339,7 @@ def union(self, other, sort=True):
23192339
else:
23202340
result = lvals
23212341

2322-
if sort:
2342+
if sort is None:
23232343
try:
23242344
result = sorting.safe_sort(result)
23252345
except TypeError as e:
@@ -2342,14 +2362,19 @@ def intersection(self, other, sort=False):
23422362
Parameters
23432363
----------
23442364
other : Index or array-like
2345-
sort : bool, default False
2346-
Sort the resulting index if possible
2365+
sort : False or None, default False
2366+
Whether to sort the resulting index.
2367+
2368+
* False : do not sort the result.
2369+
* None : sort the result, except when `self` and `other` are equal
2370+
or when the values cannot be compared.
23472371
23482372
.. versionadded:: 0.24.0
23492373
23502374
.. versionchanged:: 0.24.1
23512375
2352-
Changed the default from ``True`` to ``False``.
2376+
Changed the default from ``True`` to ``False``, to match
2377+
the behaviour of 0.23.4 and earlier.
23532378
23542379
Returns
23552380
-------
@@ -2363,6 +2388,7 @@ def intersection(self, other, sort=False):
23632388
>>> idx1.intersection(idx2)
23642389
Int64Index([3, 4], dtype='int64')
23652390
"""
2391+
self._validate_sort_keyword(sort)
23662392
self._assert_can_do_setop(other)
23672393
other = ensure_index(other)
23682394

@@ -2402,7 +2428,7 @@ def intersection(self, other, sort=False):
24022428

24032429
taken = other.take(indexer)
24042430

2405-
if sort:
2431+
if sort is None:
24062432
taken = sorting.safe_sort(taken.values)
24072433
if self.name != other.name:
24082434
name = None
@@ -2415,7 +2441,7 @@ def intersection(self, other, sort=False):
24152441

24162442
return taken
24172443

2418-
def difference(self, other, sort=True):
2444+
def difference(self, other, sort=None):
24192445
"""
24202446
Return a new Index with elements from the index that are not in
24212447
`other`.
@@ -2425,11 +2451,22 @@ def difference(self, other, sort=True):
24252451
Parameters
24262452
----------
24272453
other : Index or array-like
2428-
sort : bool, default True
2429-
Sort the resulting index if possible
2454+
sort : False or None, default None
2455+
Whether to sort the resulting index. By default, the
2456+
values are attempted to be sorted, but any TypeError from
2457+
incomparable elements is caught by pandas.
2458+
2459+
* None : Attempt to sort the result, but catch any TypeErrors
2460+
from comparing incomparable elements.
2461+
* False : Do not sort the result.
24302462
24312463
.. versionadded:: 0.24.0
24322464
2465+
.. versionchanged:: 0.24.1
2466+
2467+
Changed the default value from ``True`` to ``None``
2468+
(without change in behaviour).
2469+
24332470
Returns
24342471
-------
24352472
difference : Index
@@ -2444,6 +2481,7 @@ def difference(self, other, sort=True):
24442481
>>> idx1.difference(idx2, sort=False)
24452482
Int64Index([2, 1], dtype='int64')
24462483
"""
2484+
self._validate_sort_keyword(sort)
24472485
self._assert_can_do_setop(other)
24482486

24492487
if self.equals(other):
@@ -2460,27 +2498,38 @@ def difference(self, other, sort=True):
24602498
label_diff = np.setdiff1d(np.arange(this.size), indexer,
24612499
assume_unique=True)
24622500
the_diff = this.values.take(label_diff)
2463-
if sort:
2501+
if sort is None:
24642502
try:
24652503
the_diff = sorting.safe_sort(the_diff)
24662504
except TypeError:
24672505
pass
24682506

24692507
return this._shallow_copy(the_diff, name=result_name, freq=None)
24702508

2471-
def symmetric_difference(self, other, result_name=None, sort=True):
2509+
def symmetric_difference(self, other, result_name=None, sort=None):
24722510
"""
24732511
Compute the symmetric difference of two Index objects.
24742512
24752513
Parameters
24762514
----------
24772515
other : Index or array-like
24782516
result_name : str
2479-
sort : bool, default True
2480-
Sort the resulting index if possible
2517+
sort : False or None, default None
2518+
Whether to sort the resulting index. By default, the
2519+
values are attempted to be sorted, but any TypeError from
2520+
incomparable elements is caught by pandas.
2521+
2522+
* None : Attempt to sort the result, but catch any TypeErrors
2523+
from comparing incomparable elements.
2524+
* False : Do not sort the result.
24812525
24822526
.. versionadded:: 0.24.0
24832527
2528+
.. versionchanged:: 0.24.1
2529+
2530+
Changed the default value from ``True`` to ``None``
2531+
(without change in behaviour).
2532+
24842533
Returns
24852534
-------
24862535
symmetric_difference : Index
@@ -2504,6 +2553,7 @@ def symmetric_difference(self, other, result_name=None, sort=True):
25042553
>>> idx1 ^ idx2
25052554
Int64Index([1, 5], dtype='int64')
25062555
"""
2556+
self._validate_sort_keyword(sort)
25072557
self._assert_can_do_setop(other)
25082558
other, result_name_update = self._convert_can_do_setop(other)
25092559
if result_name is None:
@@ -2524,7 +2574,7 @@ def symmetric_difference(self, other, result_name=None, sort=True):
25242574
right_diff = other.values.take(right_indexer)
25252575

25262576
the_diff = _concat._concat_compat([left_diff, right_diff])
2527-
if sort:
2577+
if sort is None:
25282578
try:
25292579
the_diff = sorting.safe_sort(the_diff)
25302580
except TypeError:

Diff for: pandas/core/indexes/datetimes.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -602,19 +602,21 @@ def intersection(self, other, sort=False):
602602
Parameters
603603
----------
604604
other : DatetimeIndex or array-like
605-
sort : bool, default True
605+
sort : False or None, default False
606606
Sort the resulting index if possible.
607607
608608
.. versionadded:: 0.24.0
609609
610610
.. versionchanged:: 0.24.1
611611
612-
Changed the default from ``True`` to ``False``.
612+
Changed the default to ``False`` to match the behaviour
613+
from before 0.24.0.
613614
614615
Returns
615616
-------
616617
y : Index or DatetimeIndex
617618
"""
619+
self._validate_sort_keyword(sort)
618620
self._assert_can_do_setop(other)
619621

620622
if self.equals(other):

Diff for: pandas/core/indexes/interval.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1093,7 +1093,7 @@ def equals(self, other):
10931093
def overlaps(self, other):
10941094
return self._data.overlaps(other)
10951095

1096-
def _setop(op_name, sort=True):
1096+
def _setop(op_name, sort=None):
10971097
def func(self, other, sort=sort):
10981098
other = self._as_like_interval_index(other)
10991099

0 commit comments

Comments
 (0)