Skip to content

Commit 033041d

Browse files
Backport PR #42334: PERF/REGR: restore IntervalIndex._intersection_non_unique (#42336)
Co-authored-by: jbrockmendel <[email protected]>
1 parent 2c26ee7 commit 033041d

File tree

1 file changed

+31
-1
lines changed

1 file changed

+31
-1
lines changed

Diff for: pandas/core/indexes/interval.py

+31-1
Original file line numberDiff line numberDiff line change
@@ -808,7 +808,8 @@ def _intersection(self, other, sort):
808808
# multiple NaNs
809809
taken = other._intersection_unique(self)
810810
else:
811-
return super()._intersection(other, sort)
811+
# duplicates
812+
taken = self._intersection_non_unique(other)
812813

813814
if sort is None:
814815
taken = taken.sort_values()
@@ -837,6 +838,35 @@ def _intersection_unique(self, other: IntervalIndex) -> IntervalIndex:
837838

838839
return self.take(indexer)
839840

841+
def _intersection_non_unique(self, other: IntervalIndex) -> IntervalIndex:
842+
"""
843+
Used when the IntervalIndex does have some common endpoints,
844+
on either sides.
845+
Return the intersection with another IntervalIndex.
846+
847+
Parameters
848+
----------
849+
other : IntervalIndex
850+
851+
Returns
852+
-------
853+
IntervalIndex
854+
"""
855+
# Note: this is about 3.25x faster than super()._intersection(other)
856+
# in IntervalIndexMethod.time_intersection_both_duplicate(1000)
857+
mask = np.zeros(len(self), dtype=bool)
858+
859+
if self.hasnans and other.hasnans:
860+
first_nan_loc = np.arange(len(self))[self.isna()][0]
861+
mask[first_nan_loc] = True
862+
863+
other_tups = set(zip(other.left, other.right))
864+
for i, tup in enumerate(zip(self.left, self.right)):
865+
if tup in other_tups:
866+
mask[i] = True
867+
868+
return self[mask]
869+
840870
# --------------------------------------------------------------------
841871

842872
@property

0 commit comments

Comments
 (0)