Skip to content

Commit cdc638b

Browse files
committed
Add optional sort parameter to difference method in subclasses
1 parent 39715d6 commit cdc638b

File tree

3 files changed

+34
-13
lines changed

3 files changed

+34
-13
lines changed

Diff for: doc/source/whatsnew/v0.24.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@ Other Enhancements
194194
- :meth:`Index.to_frame` now supports overriding column name(s) (:issue:`22580`).
195195
- New attribute :attr:`__git_version__` will return git commit sha of current build (:issue:`21295`).
196196
- Compatibility with Matplotlib 3.0 (:issue:`22790`).
197+
- :meth:`Index.difference` now has an optional ``sort`` parameter to specify whether the results should be sorted if possible (:issue:`17839`)
197198

198199
.. _whatsnew_0240.api_breaking:
199200

Diff for: pandas/core/indexes/multi.py

+13-3
Original file line numberDiff line numberDiff line change
@@ -2764,10 +2764,18 @@ def intersection(self, other):
27642764
return MultiIndex.from_arrays(lzip(*uniq_tuples), sortorder=0,
27652765
names=result_names)
27662766

2767-
def difference(self, other):
2767+
def difference(self, other, sort=True):
27682768
"""
27692769
Compute sorted set difference of two MultiIndex objects
27702770
2771+
Parameters
2772+
----------
2773+
other : MultiIndex
2774+
sort : bool, default True
2775+
Sort the resulting MultiIndex if possible
2776+
2777+
.. versionadded:: 0.24.0
2778+
27712779
Returns
27722780
-------
27732781
diff : MultiIndex
@@ -2783,8 +2791,10 @@ def difference(self, other):
27832791
labels=[[]] * self.nlevels,
27842792
names=result_names, verify_integrity=False)
27852793

2786-
difference = sorted(set(self._ndarray_values) -
2787-
set(other._ndarray_values))
2794+
difference = set(self._ndarray_values) - set(other._ndarray_values)
2795+
2796+
if sort:
2797+
difference = sorted(difference)
27882798

27892799
if len(difference) == 0:
27902800
return MultiIndex(levels=[[]] * self.nlevels,

Diff for: pandas/tests/indexes/test_base.py

+20-10
Original file line numberDiff line numberDiff line change
@@ -1047,15 +1047,17 @@ def test_iadd_string(self):
10471047

10481048
@pytest.mark.parametrize("second_name,expected", [
10491049
(None, None), ('name', 'name')])
1050-
def test_difference_name_preservation(self, second_name, expected):
1050+
@pytest.mark.parametrize("sort", [
1051+
(True, False)])
1052+
def test_difference_name_preservation(self, second_name, expected, sort):
10511053
# TODO: replace with fixturesult
10521054
first = self.strIndex[5:20]
10531055
second = self.strIndex[:10]
10541056
answer = self.strIndex[10:20]
10551057

10561058
first.name = 'name'
10571059
second.name = second_name
1058-
result = first.difference(second)
1060+
result = first.difference(second, sort)
10591061

10601062
assert tm.equalContents(result, answer)
10611063

@@ -1064,18 +1066,22 @@ def test_difference_name_preservation(self, second_name, expected):
10641066
else:
10651067
assert result.name == expected
10661068

1067-
def test_difference_empty_arg(self):
1069+
@pytest.mark.parametrize("sort", [
1070+
(True, False)])
1071+
def test_difference_empty_arg(self, sort):
10681072
first = self.strIndex[5:20]
10691073
first.name == 'name'
1070-
result = first.difference([])
1074+
result = first.difference([], sort=sort)
10711075

10721076
assert tm.equalContents(result, first)
10731077
assert result.name == first.name
10741078

1075-
def test_difference_identity(self):
1079+
@pytest.mark.parametrize("sort", [
1080+
(True, False)])
1081+
def test_difference_identity(self, sort):
10761082
first = self.strIndex[5:20]
10771083
first.name == 'name'
1078-
result = first.difference(first)
1084+
result = first.difference(first, sort)
10791085

10801086
assert len(result) == 0
10811087
assert result.name == first.name
@@ -1124,13 +1130,15 @@ def test_symmetric_difference_non_index(self):
11241130
assert tm.equalContents(result, expected)
11251131
assert result.name == 'new_name'
11261132

1127-
def test_difference_type(self):
1133+
@pytest.mark.parametrize("sort", [
1134+
(True, False)])
1135+
def test_difference_type(self, sort):
11281136
# GH 20040
11291137
# If taking difference of a set and itself, it
11301138
# needs to preserve the type of the index
11311139
skip_index_keys = ['repeats']
11321140
for key, index in self.generate_index_types(skip_index_keys):
1133-
result = index.difference(index)
1141+
result = index.difference(index, sort)
11341142
expected = index.drop(index)
11351143
tm.assert_index_equal(result, expected)
11361144

@@ -2344,13 +2352,15 @@ def test_intersection_different_type_base(self, klass):
23442352
result = first.intersection(klass(second.values))
23452353
assert tm.equalContents(result, second)
23462354

2347-
def test_difference_base(self):
2355+
@pytest.mark.parametrize("sort", [
2356+
(True, False)])
2357+
def test_difference_base(self, sort):
23482358
# (same results for py2 and py3 but sortedness not tested elsewhere)
23492359
index = self.create_index()
23502360
first = index[:4]
23512361
second = index[3:]
23522362

2353-
result = first.difference(second)
2363+
result = first.difference(second, sort)
23542364
expected = Index([0, 1, 'a'])
23552365
tm.assert_index_equal(result, expected)
23562366

0 commit comments

Comments
 (0)