Skip to content

Commit 4cbd7c1

Browse files
committed
BUG: Try to sort result of Index.union rather than guessing sortability
closes pandas-dev#17376
1 parent 545d2de commit 4cbd7c1

File tree

4 files changed

+32
-71
lines changed

4 files changed

+32
-71
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -847,6 +847,7 @@ Other API Changes
847847
- ``KeyError`` now raises instead of ``ValueError`` in :meth:`~DataFrame.drop`, :meth:`~Panel.drop`, :meth:`~Series.drop`, :meth:`~Index.drop` when dropping a non-existent element in an axis with duplicates (:issue:`19186`)
848848
- :func:`Series.to_csv` now accepts a ``compression`` argument that works in the same way as the ``compression`` argument in :func:`DataFrame.to_csv` (:issue:`18958`)
849849
- Set operations (union, difference...) on :class:`IntervalIndex` with incompatible index types will now raise a ``TypeError`` rather than a ``ValueError`` (:issue:`19329`)
850+
- Bug in the order of the result of ``Index.union()`` when indexes contain tuples (:issue:`17376`)
850851
- :class:`DateOffset` objects render more simply, e.g. ``<DateOffset: days=1>`` instead of ``<DateOffset: kwds={'days': 1}>`` (:issue:`19403`)
851852
- ``Categorical.fillna`` now validates its ``value`` and ``method`` keyword arguments. It now raises when both or none are specified, matching the behavior of :meth:`Series.fillna` (:issue:`19682`)
852853
- ``pd.to_datetime('today')`` now returns a datetime, consistent with ``pd.Timestamp('today')``; previously ``pd.to_datetime('today')`` returned a ``.normalized()`` datetime (:issue:`19935`)

pandas/core/indexes/base.py

+7-21
Original file line numberDiff line numberDiff line change
@@ -2749,35 +2749,21 @@ def union(self, other):
27492749
value_set = set(lvals)
27502750
result.extend([x for x in rvals if x not in value_set])
27512751
else:
2752-
indexer = self.get_indexer(other)
2753-
indexer, = (indexer == -1).nonzero()
2754-
2752+
indexer = np.where(self.get_indexer(other) == -1)[0]
27552753
if len(indexer) > 0:
27562754
other_diff = algos.take_nd(rvals, indexer,
27572755
allow_fill=False)
27582756
result = _concat._concat_compat((lvals, other_diff))
27592757

2760-
try:
2761-
lvals[0] < other_diff[0]
2762-
except TypeError as e:
2763-
warnings.warn("%s, sort order is undefined for "
2764-
"incomparable objects" % e, RuntimeWarning,
2765-
stacklevel=3)
2766-
else:
2767-
types = frozenset((self.inferred_type,
2768-
other.inferred_type))
2769-
if not types & _unsortable_types:
2770-
result.sort()
2771-
27722758
else:
27732759
result = lvals
27742760

2775-
try:
2776-
result = np.sort(result)
2777-
except TypeError as e:
2778-
warnings.warn("%s, sort order is undefined for "
2779-
"incomparable objects" % e, RuntimeWarning,
2780-
stacklevel=3)
2761+
try:
2762+
result = sorting.safe_sort(result)
2763+
except TypeError as e:
2764+
warnings.warn("%s, sort order is undefined for "
2765+
"incomparable objects" % e, RuntimeWarning,
2766+
stacklevel=3)
27812767

27822768
# for subclasses
27832769
return self._wrap_union_result(other, result)

pandas/tests/indexes/test_base.py

+20-34
Original file line numberDiff line numberDiff line change
@@ -821,8 +821,7 @@ def test_union(self):
821821
expected = Index(list('ab'), name='A')
822822
tm.assert_index_equal(union, expected)
823823

824-
with tm.assert_produces_warning(RuntimeWarning):
825-
firstCat = self.strIndex.union(self.dateIndex)
824+
firstCat = self.strIndex.union(self.dateIndex)
826825
secondCat = self.strIndex.union(self.strIndex)
827826

828827
if self.dateIndex.dtype == np.object_:
@@ -1518,28 +1517,25 @@ def test_drop_tuple(self, values, to_drop):
15181517
pytest.raises(KeyError, removed.drop, drop_me)
15191518

15201519
def test_tuple_union_bug(self):
1521-
import pandas
1522-
import numpy as np
1523-
15241520
aidx1 = np.array([(1, 'A'), (2, 'A'), (1, 'B'), (2, 'B')],
15251521
dtype=[('num', int), ('let', 'a1')])
15261522
aidx2 = np.array([(1, 'A'), (2, 'A'), (1, 'B'),
15271523
(2, 'B'), (1, 'C'), (2, 'C')],
15281524
dtype=[('num', int), ('let', 'a1')])
15291525

1530-
idx1 = pandas.Index(aidx1)
1531-
idx2 = pandas.Index(aidx2)
1526+
idx1 = Index(aidx1)
1527+
idx2 = Index(aidx2)
15321528

1533-
# intersection broken?
1529+
# intersection
15341530
int_idx = idx1.intersection(idx2)
1531+
expected = idx1 # pandas.Index(sorted(set(idx1) & set(idx2)))
15351532
# needs to be 1d like idx1 and idx2
1536-
expected = idx1[:4] # pandas.Index(sorted(set(idx1) & set(idx2)))
15371533
assert int_idx.ndim == 1
15381534
tm.assert_index_equal(int_idx, expected)
15391535

1540-
# union broken
1536+
# GH 17376 (union)
15411537
union_idx = idx1.union(idx2)
1542-
expected = idx2
1538+
expected = idx2.sort_values()
15431539
assert union_idx.ndim == 1
15441540
tm.assert_index_equal(union_idx, expected)
15451541

@@ -1728,13 +1724,19 @@ def test_outer_join_sort(self):
17281724
left_idx = Index(np.random.permutation(15))
17291725
right_idx = tm.makeDateIndex(10)
17301726

1731-
with tm.assert_produces_warning(RuntimeWarning):
1727+
if PY3:
1728+
with tm.assert_produces_warning(RuntimeWarning):
1729+
joined = left_idx.join(right_idx, how='outer')
1730+
else:
17321731
joined = left_idx.join(right_idx, how='outer')
17331732

17341733
# right_idx in this case because DatetimeIndex has join precedence over
17351734
# Int64Index
1736-
with tm.assert_produces_warning(RuntimeWarning):
1737-
expected = right_idx.astype(object).union(left_idx.astype(object))
1735+
if PY3:
1736+
with tm.assert_produces_warning(RuntimeWarning):
1737+
expected = right_idx.astype(object).union(left_idx)
1738+
else:
1739+
expected = right_idx.astype(object).union(left_idx)
17381740
tm.assert_index_equal(joined, expected)
17391741

17401742
def test_nan_first_take_datetime(self):
@@ -2133,10 +2135,7 @@ def test_copy_name(self):
21332135
s1 = Series(2, index=first)
21342136
s2 = Series(3, index=second[:-1])
21352137

2136-
warning_type = RuntimeWarning if PY3 else None
2137-
with tm.assert_produces_warning(warning_type):
2138-
# Python 3: Unorderable types
2139-
s3 = s1 * s2
2138+
s3 = s1 * s2
21402139

21412140
assert s3.index.name == 'mario'
21422141

@@ -2169,27 +2168,14 @@ def test_union_base(self):
21692168
first = idx[3:]
21702169
second = idx[:5]
21712170

2172-
if PY3:
2173-
with tm.assert_produces_warning(RuntimeWarning):
2174-
# unorderable types
2175-
result = first.union(second)
2176-
expected = Index(['b', 2, 'c', 0, 'a', 1])
2177-
tm.assert_index_equal(result, expected)
2178-
else:
2179-
result = first.union(second)
2180-
expected = Index(['b', 2, 'c', 0, 'a', 1])
2181-
tm.assert_index_equal(result, expected)
2171+
expected = Index([0, 1, 2, 'a', 'b', 'c'])
2172+
result = first.union(second)
2173+
tm.assert_index_equal(result, expected)
21822174

21832175
# GH 10149
21842176
cases = [klass(second.values)
21852177
for klass in [np.array, Series, list]]
21862178
for case in cases:
2187-
if PY3:
2188-
with tm.assert_produces_warning(RuntimeWarning):
2189-
# unorderable types
2190-
result = first.union(case)
2191-
assert tm.equalContents(result, idx)
2192-
else:
21932179
result = first.union(case)
21942180
assert tm.equalContents(result, idx)
21952181

pandas/tests/series/test_operators.py

+4-16
Original file line numberDiff line numberDiff line change
@@ -1472,24 +1472,12 @@ def test_operators_bitwise(self):
14721472
pytest.raises(TypeError, lambda: s_0123 & [0.1, 4, 3.14, 2])
14731473

14741474
# s_0123 will be all false now because of reindexing like s_tft
1475-
if compat.PY3:
1476-
# unable to sort incompatible object via .union.
1477-
exp = Series([False] * 7, index=['b', 'c', 'a', 0, 1, 2, 3])
1478-
with tm.assert_produces_warning(RuntimeWarning):
1479-
assert_series_equal(s_tft & s_0123, exp)
1480-
else:
1481-
exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c'])
1482-
assert_series_equal(s_tft & s_0123, exp)
1475+
exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c'])
1476+
assert_series_equal(s_tft & s_0123, exp)
14831477

14841478
# s_tft will be all false now because of reindexing like s_0123
1485-
if compat.PY3:
1486-
# unable to sort incompatible object via .union.
1487-
exp = Series([False] * 7, index=[0, 1, 2, 3, 'b', 'c', 'a'])
1488-
with tm.assert_produces_warning(RuntimeWarning):
1489-
assert_series_equal(s_0123 & s_tft, exp)
1490-
else:
1491-
exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c'])
1492-
assert_series_equal(s_0123 & s_tft, exp)
1479+
exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c'])
1480+
assert_series_equal(s_0123 & s_tft, exp)
14931481

14941482
assert_series_equal(s_0123 & False, Series([False] * 4))
14951483
assert_series_equal(s_0123 ^ False, Series([False, True, True, True]))

0 commit comments

Comments
 (0)