Skip to content

Commit decaa47

Browse files
committed
BUG: Try to sort result of Index.union rather than guessing sortability
closes #17376
1 parent b45325e commit decaa47

File tree

4 files changed

+34
-77
lines changed

4 files changed

+34
-77
lines changed

Diff for: doc/source/whatsnew/v0.22.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ Conversion
134134
Indexing
135135
^^^^^^^^
136136

137+
- Bug in the order of the result of ``Index.union()`` when indexes contain tuples (:issue:`17376`)
137138
- Bug in :func:`Series.truncate` which raises ``TypeError`` with a monotonic ``PeriodIndex`` (:issue:`17717`)
138139
- Bug in :func:`DataFrame.groupby` where tuples were interpreted as lists of keys rather than as keys (:issue:`17979`, :issue:`18249`)
139140
- Bug in :func:`MultiIndex.remove_unused_levels`` which would fill nan values (:issue:`18417`)

Diff for: pandas/core/indexes/base.py

+7-21
Original file line numberDiff line numberDiff line change
@@ -2236,35 +2236,21 @@ def union(self, other):
22362236
value_set = set(self._values)
22372237
result.extend([x for x in other._values if x not in value_set])
22382238
else:
2239-
indexer = self.get_indexer(other)
2240-
indexer, = (indexer == -1).nonzero()
2241-
2239+
indexer = np.where(self.get_indexer(other) == -1)[0]
22422240
if len(indexer) > 0:
22432241
other_diff = algos.take_nd(other._values, indexer,
22442242
allow_fill=False)
22452243
result = _concat._concat_compat((self._values, other_diff))
22462244

2247-
try:
2248-
self._values[0] < other_diff[0]
2249-
except TypeError as e:
2250-
warnings.warn("%s, sort order is undefined for "
2251-
"incomparable objects" % e, RuntimeWarning,
2252-
stacklevel=3)
2253-
else:
2254-
types = frozenset((self.inferred_type,
2255-
other.inferred_type))
2256-
if not types & _unsortable_types:
2257-
result.sort()
2258-
22592245
else:
22602246
result = self._values
22612247

2262-
try:
2263-
result = np.sort(result)
2264-
except TypeError as e:
2265-
warnings.warn("%s, sort order is undefined for "
2266-
"incomparable objects" % e, RuntimeWarning,
2267-
stacklevel=3)
2248+
try:
2249+
result = sorting.safe_sort(result)
2250+
except TypeError as e:
2251+
warnings.warn("%s, sort order is undefined for "
2252+
"incomparable objects" % e, RuntimeWarning,
2253+
stacklevel=3)
22682254

22692255
# for subclasses
22702256
return self._wrap_union_result(other, result)

Diff for: pandas/tests/indexes/test_base.py

+21-35
Original file line numberDiff line numberDiff line change
@@ -751,8 +751,7 @@ def test_union(self):
751751
expected = Index(list('ab'), name='A')
752752
tm.assert_index_equal(union, expected)
753753

754-
with tm.assert_produces_warning(RuntimeWarning):
755-
firstCat = self.strIndex.union(self.dateIndex)
754+
firstCat = self.strIndex.union(self.dateIndex)
756755
secondCat = self.strIndex.union(self.strIndex)
757756

758757
if self.dateIndex.dtype == np.object_:
@@ -1341,29 +1340,26 @@ def test_drop(self):
13411340
expected = Index([1, 2])
13421341
tm.assert_index_equal(dropped, expected)
13431342

1344-
def test_tuple_union_bug(self):
1345-
import pandas
1346-
import numpy as np
1347-
1343+
def test_tuples_intersection_union(self):
13481344
aidx1 = np.array([(1, 'A'), (2, 'A'), (1, 'B'), (2, 'B')],
13491345
dtype=[('num', int), ('let', 'a1')])
13501346
aidx2 = np.array([(1, 'A'), (2, 'A'), (1, 'B'),
13511347
(2, 'B'), (1, 'C'), (2, 'C')],
13521348
dtype=[('num', int), ('let', 'a1')])
13531349

1354-
idx1 = pandas.Index(aidx1)
1355-
idx2 = pandas.Index(aidx2)
1350+
idx1 = Index(aidx1)
1351+
idx2 = Index(aidx2)
13561352

1357-
# intersection broken?
1353+
# intersection
13581354
int_idx = idx1.intersection(idx2)
1355+
expected = idx1 # pandas.Index(sorted(set(idx1) & set(idx2)))
13591356
# needs to be 1d like idx1 and idx2
1360-
expected = idx1[:4] # pandas.Index(sorted(set(idx1) & set(idx2)))
13611357
assert int_idx.ndim == 1
13621358
tm.assert_index_equal(int_idx, expected)
13631359

1364-
# union broken
1360+
# GH 17376 (union)
13651361
union_idx = idx1.union(idx2)
1366-
expected = idx2
1362+
expected = idx2.sort_values()
13671363
assert union_idx.ndim == 1
13681364
tm.assert_index_equal(union_idx, expected)
13691365

@@ -1553,13 +1549,19 @@ def test_outer_join_sort(self):
15531549
left_idx = Index(np.random.permutation(15))
15541550
right_idx = tm.makeDateIndex(10)
15551551

1556-
with tm.assert_produces_warning(RuntimeWarning):
1552+
if PY3:
1553+
with tm.assert_produces_warning(RuntimeWarning):
1554+
joined = left_idx.join(right_idx, how='outer')
1555+
else:
15571556
joined = left_idx.join(right_idx, how='outer')
15581557

15591558
# right_idx in this case because DatetimeIndex has join precedence over
15601559
# Int64Index
1561-
with tm.assert_produces_warning(RuntimeWarning):
1562-
expected = right_idx.astype(object).union(left_idx.astype(object))
1560+
if PY3:
1561+
with tm.assert_produces_warning(RuntimeWarning):
1562+
expected = right_idx.astype(object).union(left_idx)
1563+
else:
1564+
expected = right_idx.astype(object).union(left_idx)
15631565
tm.assert_index_equal(joined, expected)
15641566

15651567
def test_nan_first_take_datetime(self):
@@ -1943,10 +1945,7 @@ def test_copy_name(self):
19431945
s1 = Series(2, index=first)
19441946
s2 = Series(3, index=second[:-1])
19451947

1946-
warning_type = RuntimeWarning if PY3 else None
1947-
with tm.assert_produces_warning(warning_type):
1948-
# Python 3: Unorderable types
1949-
s3 = s1 * s2
1948+
s3 = s1 * s2
19501949

19511950
assert s3.index.name == 'mario'
19521951

@@ -1979,27 +1978,14 @@ def test_union_base(self):
19791978
first = idx[3:]
19801979
second = idx[:5]
19811980

1982-
if PY3:
1983-
with tm.assert_produces_warning(RuntimeWarning):
1984-
# unorderable types
1985-
result = first.union(second)
1986-
expected = Index(['b', 2, 'c', 0, 'a', 1])
1987-
tm.assert_index_equal(result, expected)
1988-
else:
1989-
result = first.union(second)
1990-
expected = Index(['b', 2, 'c', 0, 'a', 1])
1991-
tm.assert_index_equal(result, expected)
1981+
expected = Index([0, 1, 2, 'a', 'b', 'c'])
1982+
result = first.union(second)
1983+
tm.assert_index_equal(result, expected)
19921984

19931985
# GH 10149
19941986
cases = [klass(second.values)
19951987
for klass in [np.array, Series, list]]
19961988
for case in cases:
1997-
if PY3:
1998-
with tm.assert_produces_warning(RuntimeWarning):
1999-
# unorderable types
2000-
result = first.union(case)
2001-
assert tm.equalContents(result, idx)
2002-
else:
20031989
result = first.union(case)
20041990
assert tm.equalContents(result, idx)
20051991

Diff for: pandas/tests/series/test_operators.py

+5-21
Original file line numberDiff line numberDiff line change
@@ -1198,11 +1198,7 @@ def test_comparison_label_based(self):
11981198
assert_series_equal(result, a[a])
11991199

12001200
for e in [Series(['z'])]:
1201-
if compat.PY3:
1202-
with tm.assert_produces_warning(RuntimeWarning):
1203-
result = a[a | e]
1204-
else:
1205-
result = a[a | e]
1201+
result = a[a | e]
12061202
assert_series_equal(result, a[a])
12071203

12081204
# vs scalars
@@ -1394,24 +1390,12 @@ def test_operators_bitwise(self):
13941390
pytest.raises(TypeError, lambda: s_0123 & [0.1, 4, 3.14, 2])
13951391

13961392
# s_0123 will be all false now because of reindexing like s_tft
1397-
if compat.PY3:
1398-
# unable to sort incompatible object via .union.
1399-
exp = Series([False] * 7, index=['b', 'c', 'a', 0, 1, 2, 3])
1400-
with tm.assert_produces_warning(RuntimeWarning):
1401-
assert_series_equal(s_tft & s_0123, exp)
1402-
else:
1403-
exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c'])
1404-
assert_series_equal(s_tft & s_0123, exp)
1393+
exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c'])
1394+
assert_series_equal(s_tft & s_0123, exp)
14051395

14061396
# s_tft will be all false now because of reindexing like s_0123
1407-
if compat.PY3:
1408-
# unable to sort incompatible object via .union.
1409-
exp = Series([False] * 7, index=[0, 1, 2, 3, 'b', 'c', 'a'])
1410-
with tm.assert_produces_warning(RuntimeWarning):
1411-
assert_series_equal(s_0123 & s_tft, exp)
1412-
else:
1413-
exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c'])
1414-
assert_series_equal(s_0123 & s_tft, exp)
1397+
exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c'])
1398+
assert_series_equal(s_0123 & s_tft, exp)
14151399

14161400
assert_series_equal(s_0123 & False, Series([False] * 4))
14171401
assert_series_equal(s_0123 ^ False, Series([False, True, True, True]))

0 commit comments

Comments
 (0)