From cffb8631503dbb588da10a8d4908c8affcbdfb02 Mon Sep 17 00:00:00 2001 From: GSAUC3 Date: Mon, 12 May 2025 00:35:44 +0530 Subject: [PATCH 01/13] sortedarray in side multi.py implemented, testing pending --- .gitignore | 2 ++ pandas/core/indexes/multi.py | 57 ++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/.gitignore b/.gitignore index d951f3fb9cbad..6cef619a6c741 100644 --- a/.gitignore +++ b/.gitignore @@ -141,3 +141,5 @@ doc/source/savefig/ # Pyodide/WASM related files # ############################## /.pyodide-xbuildenv-* + +*.ipynb \ No newline at end of file diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 29b34f560ab2e..ba56ee22f8e31 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3778,6 +3778,60 @@ def _reorder_indexer( ind = np.lexsort(keys) return indexer[ind] + + def searchsorted( + self, + value: tuple[Hashable, ...], + side: Literal["left", "right"] = "left", + sorter: npt.NDArray[np.intp] | None = None, + ) -> npt.NDArray[np.intp]: + """ + Find the indices where elements should be inserted to maintain order. + + Parameters + ---------- + value : tuple + The value(s) to search for in the MultiIndex. + side : {'left', 'right'}, default 'left' + If 'left', the index of the first suitable location found is given. + If 'right', return the last such index. Note that if `value` is + already present in the MultiIndex, the results will be different. + sorter : 1-D array-like, optional + Optional array of integer indices that sort the MultiIndex. + + Returns + ------- + numpy.ndarray + Array of insertion points. + + See Also + -------- + Index.searchsorted : Search for insertion point in a 1-D index. + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays([["a", "b", "c"], ["x", "y", "z"]]) + >>> mi.searchsorted(("b", "y")) + 1 + """ + if isinstance(value, tuple): + value = list(value) + + if side not in ["left", "right"]: + raise ValueError("side must be either 'left' or 'right'") + + if not value: + raise ValueError("searchsorted requires a non-empty value") + + + + dtype = np.dtype([(f"level_{i}", level.dtype) for i,level in enumerate(self.levels)]) + + val = np.asarray(value, dtype=dtype) + + return np.searchsorted(self.values.astype(dtype),val, side=side, sorter=sorter) + + def truncate(self, before=None, after=None) -> MultiIndex: """ Slice index between two labels / tuples, return new MultiIndex. @@ -4337,3 +4391,6 @@ def cartesian_product(X: list[np.ndarray]) -> list[np.ndarray]: ) for i, x in enumerate(X) ] + + + From 1ba7ff88cc0cbbd849f9e0e9f4b1b981534ff6ec Mon Sep 17 00:00:00 2001 From: GSAUC3 Date: Mon, 12 May 2025 22:43:12 +0530 Subject: [PATCH 02/13] implemented the searchsorted() method, w.r.t issue #18433 --- pandas/core/indexes/multi.py | 33 +++++++++++++----- pandas/tests/indexes/multi/test_indexing.py | 37 +++++++++++++++++++++ 2 files changed, 61 insertions(+), 9 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index ba56ee22f8e31..c86eef2719f27 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3814,22 +3814,37 @@ def searchsorted( >>> mi.searchsorted(("b", "y")) 1 """ - if isinstance(value, tuple): - value = list(value) + if not isinstance(value, (tuple,list)): + raise TypeError("value must be a tuple or list") + if isinstance(value, tuple): + value = [value] if side not in ["left", "right"]: raise ValueError("side must be either 'left' or 'right'") if not value: raise ValueError("searchsorted requires a non-empty value") - - - dtype = np.dtype([(f"level_{i}", level.dtype) for i,level in enumerate(self.levels)]) - - val = np.asarray(value, dtype=dtype) - - return np.searchsorted(self.values.astype(dtype),val, side=side, sorter=sorter) + try: + + indexer = self.get_indexer(value) + result = [] + + for v, i in zip(value, indexer): + if i!= -1: + result.append(i if side == "left" else i + 1) + else: + dtype = np.dtype([(f"level_{i}", level.dtype) for i, level in enumerate(self.levels)]) + + val_array = np.array(value, dtype=dtype) + + pos = np.searchsorted( np.asarray(self.values,dtype=dtype),val_array , side=side, sorter = sorter) + result.append(pos) + + return np.array(result, dtype=np.intp) + + except KeyError: + pass def truncate(self, before=None, after=None) -> MultiIndex: diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index f098690be2afa..a9fd402058270 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -1029,3 +1029,40 @@ def test_get_loc_namedtuple_behaves_like_tuple(): assert idx.get_loc(("i1", "i2")) == 0 assert idx.get_loc(("i3", "i4")) == 1 assert idx.get_loc(("i5", "i6")) == 2 + + + + +def test_searchsorted(): + mi = MultiIndex.from_tuples([ + ('a', 0), + ('a', 1), + ('b', 0), + ('b', 1), + ('c', 0) + ]) + + + assert mi.searchsorted(('b', 0)) == 2 + assert mi.searchsorted(('b', 0), side="right") == 3 + + assert mi.searchsorted(('a', 0)) == 0 + assert mi.searchsorted(('a', -1)) == 0 + assert mi.searchsorted(('c', 1)) == 5 # Beyond the last + + + result = mi.searchsorted([('a', 1), ('b', 0), ('c', 0)]) + expected = np.array([1, 2, 4], dtype=np.intp) + np.testing.assert_array_equal(result, expected) + + + result = mi.searchsorted([('a', 1), ('b', 0), ('c', 0)], side='right') + expected = np.array([2, 3, 5], dtype=np.intp) + np.testing.assert_array_equal(result, expected) + + + with pytest.raises(ValueError, match="side must be either 'left' or 'right'"): + mi.searchsorted(('a', 1), side='middle') + + with pytest.raises(TypeError, match="value must be a tuple or list"): + mi.searchsorted('a') # not a tuple From ac70f3e6a87362f6ac7e07db5b378c6e400f38e2 Mon Sep 17 00:00:00 2001 From: GSAUC3 Date: Mon, 12 May 2025 23:01:38 +0530 Subject: [PATCH 03/13] modified test_searchsorted, discarded the use of numpy.testing --- .gitignore | 2 +- pandas/core/indexes/multi.py | 36 +++++++++-------- pandas/tests/indexes/multi/test_indexing.py | 43 ++++++++------------- 3 files changed, 37 insertions(+), 44 deletions(-) diff --git a/.gitignore b/.gitignore index 6cef619a6c741..5ab3710e0c8f9 100644 --- a/.gitignore +++ b/.gitignore @@ -142,4 +142,4 @@ doc/source/savefig/ ############################## /.pyodide-xbuildenv-* -*.ipynb \ No newline at end of file +*.ipynb diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index c86eef2719f27..ae37188ffbcc5 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3778,7 +3778,6 @@ def _reorder_indexer( ind = np.lexsort(keys) return indexer[ind] - def searchsorted( self, value: tuple[Hashable, ...], @@ -3814,39 +3813,47 @@ def searchsorted( >>> mi.searchsorted(("b", "y")) 1 """ - if not isinstance(value, (tuple,list)): + if not isinstance(value, (tuple, list)): raise TypeError("value must be a tuple or list") if isinstance(value, tuple): value = [value] if side not in ["left", "right"]: raise ValueError("side must be either 'left' or 'right'") - + if not value: raise ValueError("searchsorted requires a non-empty value") - - try: - + + try: indexer = self.get_indexer(value) result = [] for v, i in zip(value, indexer): - if i!= -1: + if i != -1: result.append(i if side == "left" else i + 1) - else: - dtype = np.dtype([(f"level_{i}", level.dtype) for i, level in enumerate(self.levels)]) + else: + dtype = np.dtype( + [ + (f"level_{i}", level.dtype) + for i, level in enumerate(self.levels) + ] + ) val_array = np.array(value, dtype=dtype) - - pos = np.searchsorted( np.asarray(self.values,dtype=dtype),val_array , side=side, sorter = sorter) + + pos = np.searchsorted( + np.asarray(self.values, dtype=dtype), + val_array, + side=side, + sorter=sorter, + ) result.append(pos) - + return np.array(result, dtype=np.intp) except KeyError: pass - def truncate(self, before=None, after=None) -> MultiIndex: """ Slice index between two labels / tuples, return new MultiIndex. @@ -4406,6 +4413,3 @@ def cartesian_product(X: list[np.ndarray]) -> list[np.ndarray]: ) for i, x in enumerate(X) ] - - - diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index a9fd402058270..69695655c5a6a 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -1031,38 +1031,27 @@ def test_get_loc_namedtuple_behaves_like_tuple(): assert idx.get_loc(("i5", "i6")) == 2 +def test_searchsorted(): + mi = MultiIndex.from_tuples([("a", 0), ("a", 1), + ("b", 0), ("b", 1), ("c", 0)]) + assert mi.searchsorted(("b", 0)) == 2 + assert mi.searchsorted(("b", 0), side="right") == 3 -def test_searchsorted(): - mi = MultiIndex.from_tuples([ - ('a', 0), - ('a', 1), - ('b', 0), - ('b', 1), - ('c', 0) - ]) - - - assert mi.searchsorted(('b', 0)) == 2 - assert mi.searchsorted(('b', 0), side="right") == 3 - - assert mi.searchsorted(('a', 0)) == 0 - assert mi.searchsorted(('a', -1)) == 0 - assert mi.searchsorted(('c', 1)) == 5 # Beyond the last - - - result = mi.searchsorted([('a', 1), ('b', 0), ('c', 0)]) + assert mi.searchsorted(("a", 0)) == 0 + assert mi.searchsorted(("a", -1)) == 0 + assert mi.searchsorted(("c", 1)) == 5 # Beyond the last + + result = mi.searchsorted([("a", 1), ("b", 0), ("c", 0)]) expected = np.array([1, 2, 4], dtype=np.intp) - np.testing.assert_array_equal(result, expected) + tm.assert_numpy_array_equal(result, expected) - - result = mi.searchsorted([('a', 1), ('b', 0), ('c', 0)], side='right') + result = mi.searchsorted([("a", 1), ("b", 0), ("c", 0)], side="right") expected = np.array([2, 3, 5], dtype=np.intp) - np.testing.assert_array_equal(result, expected) + tm.assert_numpy_array_equal(result, expected) - with pytest.raises(ValueError, match="side must be either 'left' or 'right'"): - mi.searchsorted(('a', 1), side='middle') - + mi.searchsorted(("a", 1), side="middle") + with pytest.raises(TypeError, match="value must be a tuple or list"): - mi.searchsorted('a') # not a tuple + mi.searchsorted("a") # not a tuple From 275b0e2e4f49a3e5135b51056251bebb7362ebb0 Mon Sep 17 00:00:00 2001 From: GSAUC3 Date: Mon, 12 May 2025 23:04:40 +0530 Subject: [PATCH 04/13] applying pre-commit fixes --- pandas/tests/indexes/multi/test_indexing.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index 69695655c5a6a..91bdc3dcdb4d7 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -1032,8 +1032,7 @@ def test_get_loc_namedtuple_behaves_like_tuple(): def test_searchsorted(): - mi = MultiIndex.from_tuples([("a", 0), ("a", 1), - ("b", 0), ("b", 1), ("c", 0)]) + mi = MultiIndex.from_tuples([("a", 0), ("a", 1), ("b", 0), ("b", 1), ("c", 0)]) assert mi.searchsorted(("b", 0)) == 2 assert mi.searchsorted(("b", 0), side="right") == 3 From 0e0b9b516ccb704a7ff1dc4f4ed55d2bb40510ee Mon Sep 17 00:00:00 2001 From: GSAUC3 Date: Wed, 14 May 2025 00:27:42 +0530 Subject: [PATCH 05/13] fixed the returned statement error --- pandas/core/indexes/multi.py | 51 ++++++++++----------- pandas/tests/indexes/multi/test_indexing.py | 13 +++--- 2 files changed, 31 insertions(+), 33 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index ae37188ffbcc5..9500b5623e795 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3780,7 +3780,7 @@ def _reorder_indexer( def searchsorted( self, - value: tuple[Hashable, ...], + value: Union[Tuple[Hashable, ...], Sequence[Tuple[Hashable, ...]]], side: Literal["left", "right"] = "left", sorter: npt.NDArray[np.intp] | None = None, ) -> npt.NDArray[np.intp]: @@ -3817,42 +3817,39 @@ def searchsorted( raise TypeError("value must be a tuple or list") if isinstance(value, tuple): - value = [value] + values = [value] if side not in ["left", "right"]: raise ValueError("side must be either 'left' or 'right'") if not value: raise ValueError("searchsorted requires a non-empty value") - try: - indexer = self.get_indexer(value) - result = [] + + indexer = self.get_indexer(value) + result = [] - for v, i in zip(value, indexer): - if i != -1: - result.append(i if side == "left" else i + 1) - else: - dtype = np.dtype( - [ - (f"level_{i}", level.dtype) - for i, level in enumerate(self.levels) - ] - ) - - val_array = np.array(value, dtype=dtype) + for v, i in zip(value, indexer): + if i != -1: + result.append(i if side == "left" else i + 1) + else: + dtype = np.dtype( + [ + (f"level_{i}", level.dtype) + for i, level in enumerate(self.levels) + ] + ) - pos = np.searchsorted( - np.asarray(self.values, dtype=dtype), - val_array, - side=side, - sorter=sorter, - ) - result.append(pos) + val_array = np.array(values, dtype=dtype) - return np.array(result, dtype=np.intp) + pos = np.searchsorted( + np.asarray(self.values, dtype=dtype), + val_array, + side=side, + sorter=sorter, + ) + result.append(pos) - except KeyError: - pass + return np.array(result, dtype=np.intp) def truncate(self, before=None, after=None) -> MultiIndex: """ diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index 91bdc3dcdb4d7..e6e31afd63303 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -1032,14 +1032,15 @@ def test_get_loc_namedtuple_behaves_like_tuple(): def test_searchsorted(): - mi = MultiIndex.from_tuples([("a", 0), ("a", 1), ("b", 0), ("b", 1), ("c", 0)]) + mi = MultiIndex.from_tuples([("a", 0), ("a", 1), ("b", 0), + ("b", 1), ("c", 0)]) - assert mi.searchsorted(("b", 0)) == 2 - assert mi.searchsorted(("b", 0), side="right") == 3 + assert np.all(mi.searchsorted(("b", 0)) == 2) + assert np.all(mi.searchsorted(("b", 0), side="right") == 3) - assert mi.searchsorted(("a", 0)) == 0 - assert mi.searchsorted(("a", -1)) == 0 - assert mi.searchsorted(("c", 1)) == 5 # Beyond the last + assert np.all(mi.searchsorted(("a", 0)) == 0) + assert np.all(mi.searchsorted(("a", -1)) == 0) + assert np.all(mi.searchsorted(("c", 1)) == 5) # Beyond the last result = mi.searchsorted([("a", 1), ("b", 0), ("c", 0)]) expected = np.array([1, 2, 4], dtype=np.intp) From 47476096c47d8c794e254bfe4a77f2ea53f7e029 Mon Sep 17 00:00:00 2001 From: GSAUC3 Date: Wed, 14 May 2025 23:57:53 +0530 Subject: [PATCH 06/13] solved the mypy type checking error, and implemented searchsorted under MultiIndex() --- pandas/core/indexes/multi.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 9500b5623e795..1977110e688e3 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3780,7 +3780,7 @@ def _reorder_indexer( def searchsorted( self, - value: Union[Tuple[Hashable, ...], Sequence[Tuple[Hashable, ...]]], + value: Any, side: Literal["left", "right"] = "left", sorter: npt.NDArray[np.intp] | None = None, ) -> npt.NDArray[np.intp]: @@ -3789,7 +3789,7 @@ def searchsorted( Parameters ---------- - value : tuple + value : Any The value(s) to search for in the MultiIndex. side : {'left', 'right'}, default 'left' If 'left', the index of the first suitable location found is given. @@ -3800,7 +3800,7 @@ def searchsorted( Returns ------- - numpy.ndarray + npt.NDArray[np.intp] Array of insertion points. See Also @@ -3813,18 +3813,19 @@ def searchsorted( >>> mi.searchsorted(("b", "y")) 1 """ + + if not value: + raise ValueError("searchsorted requires a non-empty value") + if not isinstance(value, (tuple, list)): raise TypeError("value must be a tuple or list") if isinstance(value, tuple): - values = [value] + value = [value] + if side not in ["left", "right"]: raise ValueError("side must be either 'left' or 'right'") - if not value: - raise ValueError("searchsorted requires a non-empty value") - - indexer = self.get_indexer(value) result = [] @@ -3834,12 +3835,12 @@ def searchsorted( else: dtype = np.dtype( [ - (f"level_{i}", level.dtype) + (f"level_{i}", np.asarray(level).dtype) for i, level in enumerate(self.levels) ] ) - - val_array = np.array(values, dtype=dtype) + + val_array = np.array([v], dtype=dtype) pos = np.searchsorted( np.asarray(self.values, dtype=dtype), @@ -3847,7 +3848,7 @@ def searchsorted( side=side, sorter=sorter, ) - result.append(pos) + result.append(int(pos[0])) return np.array(result, dtype=np.intp) From e2c2c5e130c6ab9146b24e24e102dc1875db391b Mon Sep 17 00:00:00 2001 From: GSAUC3 Date: Sat, 17 May 2025 09:56:27 +0530 Subject: [PATCH 07/13] Closes the GH 14833 issue --- pandas/core/indexes/multi.py | 30 +++++++++++++++++++-- pandas/tests/base/test_misc.py | 18 ++++++------- pandas/tests/indexes/multi/test_indexing.py | 9 +++---- 3 files changed, 41 insertions(+), 16 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 1977110e688e3..2edc284a848d8 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -16,6 +16,7 @@ Any, Literal, cast, + overload, ) import warnings @@ -44,6 +45,14 @@ Shape, npt, ) +if TYPE_CHECKING: + from pandas._typing import ( + NumpySorter, + NumpyValueArrayLike, + ScalarLike_co, + ) + + from pandas.compat.numpy import function as nv from pandas.errors import ( InvalidIndexError, @@ -3778,9 +3787,25 @@ def _reorder_indexer( ind = np.lexsort(keys) return indexer[ind] + @overload + def searchsorted( + self, + value: ScalarLike_co, + side: Literal["left", "right"] = ..., + sirter:NumpySorter = ..., + ) -> np.intp:... + + @overload + def searchsorted( + self, + value: npt.ArrayLike | ExtensionArray, + side: Literal["left", "right"] = ..., + sorter: NumpySorter = ..., + ) -> npt.NDArray[np.intp]:... + def searchsorted( self, - value: Any, + value: NumpyValueArrayLike | ExtensionArray, side: Literal["left", "right"] = "left", sorter: npt.NDArray[np.intp] | None = None, ) -> npt.NDArray[np.intp]: @@ -3831,6 +3856,7 @@ def searchsorted( for v, i in zip(value, indexer): if i != -1: + result.append(i if side == "left" else i + 1) else: dtype = np.dtype( @@ -3839,7 +3865,7 @@ def searchsorted( for i, level in enumerate(self.levels) ] ) - + val_array = np.array([v], dtype=dtype) pos = np.searchsorted( diff --git a/pandas/tests/base/test_misc.py b/pandas/tests/base/test_misc.py index 7819b7b75f065..b8074e1259ef0 100644 --- a/pandas/tests/base/test_misc.py +++ b/pandas/tests/base/test_misc.py @@ -141,20 +141,20 @@ def test_memory_usage_components_narrow_series(any_real_numpy_dtype): index_usage = series.index.memory_usage() assert total_usage == non_index_usage + index_usage - + def test_searchsorted(request, index_or_series_obj): # numpy.searchsorted calls obj.searchsorted under the hood. # See gh-12238 obj = index_or_series_obj - if isinstance(obj, pd.MultiIndex): - # See gh-14833 - request.applymarker( - pytest.mark.xfail( - reason="np.searchsorted doesn't work on pd.MultiIndex: GH 14833" - ) - ) - elif obj.dtype.kind == "c" and isinstance(obj, Index): + # if isinstance(obj, pd.MultiIndex): + # # See gh-14833 + # request.applymarker( + # pytest.mark.xfail( + # reason="np.searchsorted doesn't work on pd.MultiIndex: GH 14833" + # ) + # ) + if obj.dtype.kind == "c" and isinstance(obj, Index): # TODO: Should Series cases also raise? Looks like they use numpy # comparison semantics https://github.com/numpy/numpy/issues/15981 mark = pytest.mark.xfail(reason="complex objects are not comparable") diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index e6e31afd63303..672a733ea4d57 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -1032,15 +1032,14 @@ def test_get_loc_namedtuple_behaves_like_tuple(): def test_searchsorted(): - mi = MultiIndex.from_tuples([("a", 0), ("a", 1), ("b", 0), - ("b", 1), ("c", 0)]) + # GH14833 + mi = MultiIndex.from_tuples([("a", 0), ("a", 1), ("b", 0), ("b", 1), ("c", 0)]) assert np.all(mi.searchsorted(("b", 0)) == 2) assert np.all(mi.searchsorted(("b", 0), side="right") == 3) - assert np.all(mi.searchsorted(("a", 0)) == 0) assert np.all(mi.searchsorted(("a", -1)) == 0) - assert np.all(mi.searchsorted(("c", 1)) == 5) # Beyond the last + assert np.all(mi.searchsorted(("c", 1)) == 5) result = mi.searchsorted([("a", 1), ("b", 0), ("c", 0)]) expected = np.array([1, 2, 4], dtype=np.intp) @@ -1054,4 +1053,4 @@ def test_searchsorted(): mi.searchsorted(("a", 1), side="middle") with pytest.raises(TypeError, match="value must be a tuple or list"): - mi.searchsorted("a") # not a tuple + mi.searchsorted("a") From e88da57ae9ae47816f229a2d3d828d201fcfc03c Mon Sep 17 00:00:00 2001 From: GSAUC3 Date: Sat, 17 May 2025 11:40:57 +0530 Subject: [PATCH 08/13] pre-commit run successful, closes issue 14833 --- pandas/core/indexes/multi.py | 10 +++++----- pandas/tests/base/test_misc.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 2edc284a848d8..d4ed817e4e242 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -45,6 +45,7 @@ Shape, npt, ) + if TYPE_CHECKING: from pandas._typing import ( NumpySorter, @@ -3792,8 +3793,8 @@ def searchsorted( self, value: ScalarLike_co, side: Literal["left", "right"] = ..., - sirter:NumpySorter = ..., - ) -> np.intp:... + sorter: NumpySorter = ..., + ) -> np.intp: ... @overload def searchsorted( @@ -3801,14 +3802,14 @@ def searchsorted( value: npt.ArrayLike | ExtensionArray, side: Literal["left", "right"] = ..., sorter: NumpySorter = ..., - ) -> npt.NDArray[np.intp]:... + ) -> npt.NDArray[np.intp]: ... def searchsorted( self, value: NumpyValueArrayLike | ExtensionArray, side: Literal["left", "right"] = "left", sorter: npt.NDArray[np.intp] | None = None, - ) -> npt.NDArray[np.intp]: + ) -> np.intp | npt.NDArray[np.intp]: """ Find the indices where elements should be inserted to maintain order. @@ -3856,7 +3857,6 @@ def searchsorted( for v, i in zip(value, indexer): if i != -1: - result.append(i if side == "left" else i + 1) else: dtype = np.dtype( diff --git a/pandas/tests/base/test_misc.py b/pandas/tests/base/test_misc.py index b8074e1259ef0..e8c57e5db340f 100644 --- a/pandas/tests/base/test_misc.py +++ b/pandas/tests/base/test_misc.py @@ -141,7 +141,7 @@ def test_memory_usage_components_narrow_series(any_real_numpy_dtype): index_usage = series.index.memory_usage() assert total_usage == non_index_usage + index_usage - + def test_searchsorted(request, index_or_series_obj): # numpy.searchsorted calls obj.searchsorted under the hood. # See gh-12238 From 94f7c44acecc485a6c0d2458737485bfafa6cb92 Mon Sep 17 00:00:00 2001 From: GSAUC3 Date: Sat, 17 May 2025 12:18:44 +0530 Subject: [PATCH 09/13] fixed incompatible return type issue, and closes issue 14833 --- pandas/core/indexes/multi.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index d4ed817e4e242..362defa788928 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3857,7 +3857,8 @@ def searchsorted( for v, i in zip(value, indexer): if i != -1: - result.append(i if side == "left" else i + 1) + val = i if side == "left" else i + 1 + result.append(np.intp(val)) else: dtype = np.dtype( [ @@ -3874,7 +3875,7 @@ def searchsorted( side=side, sorter=sorter, ) - result.append(int(pos[0])) + result.append(np.intp(pos[0])) return np.array(result, dtype=np.intp) From 1f4a1c9576a4b6acfb9e219402adbde5f8e917b6 Mon Sep 17 00:00:00 2001 From: GSAUC3 Date: Sat, 17 May 2025 22:14:47 +0530 Subject: [PATCH 10/13] fixed typing checks; closes issue 14833 --- pandas/core/indexes/multi.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 362defa788928..7032979a51b3a 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3789,7 +3789,7 @@ def _reorder_indexer( return indexer[ind] @overload - def searchsorted( + def searchsorted( # type: ignore[overload-overlap] self, value: ScalarLike_co, side: Literal["left", "right"] = ..., @@ -3809,7 +3809,7 @@ def searchsorted( value: NumpyValueArrayLike | ExtensionArray, side: Literal["left", "right"] = "left", sorter: npt.NDArray[np.intp] | None = None, - ) -> np.intp | npt.NDArray[np.intp]: + ) -> npt.NDArray[np.intp] | np.intp: """ Find the indices where elements should be inserted to maintain order. @@ -3826,8 +3826,9 @@ def searchsorted( Returns ------- - npt.NDArray[np.intp] - Array of insertion points. + npt.NDArray[np.intp] or np.intp + The index or indices where the value(s) should be inserted to + maintain order. See Also -------- @@ -3837,7 +3838,7 @@ def searchsorted( -------- >>> mi = pd.MultiIndex.from_arrays([["a", "b", "c"], ["x", "y", "z"]]) >>> mi.searchsorted(("b", "y")) - 1 + array([1]) """ if not value: @@ -3876,7 +3877,8 @@ def searchsorted( sorter=sorter, ) result.append(np.intp(pos[0])) - + if len(result) == 1: + return result[0] return np.array(result, dtype=np.intp) def truncate(self, before=None, after=None) -> MultiIndex: From ffd99d877fba2100d1c64b6a28c2eab6b2278305 Mon Sep 17 00:00:00 2001 From: GSAUC3 Date: Mon, 19 May 2025 23:05:41 +0530 Subject: [PATCH 11/13] updated whatsnew list, added more descriptive tests --- .gitignore | 2 - doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/tests/base/test_misc.py | 7 --- pandas/tests/indexes/multi/test_indexing.py | 63 +++++++++++++++------ 4 files changed, 47 insertions(+), 27 deletions(-) diff --git a/.gitignore b/.gitignore index 5ab3710e0c8f9..d951f3fb9cbad 100644 --- a/.gitignore +++ b/.gitignore @@ -141,5 +141,3 @@ doc/source/savefig/ # Pyodide/WASM related files # ############################## /.pyodide-xbuildenv-* - -*.ipynb diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 6642f5855f4fe..c8c784132c084 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -80,6 +80,7 @@ Other enhancements - Added half-year offset classes :class:`HalfYearBegin`, :class:`HalfYearEnd`, :class:`BHalfYearBegin` and :class:`BHalfYearEnd` (:issue:`60928`) - Added support to read from Apache Iceberg tables with the new :func:`read_iceberg` function (:issue:`61383`) - Errors occurring during SQL I/O will now throw a generic :class:`.DatabaseError` instead of the raw Exception type from the underlying driver manager library (:issue:`60748`) +- Implemented :meth:`MultiIndex.searchsorted` (:issue:`14833`) - Implemented :meth:`Series.str.isascii` and :meth:`Series.str.isascii` (:issue:`59091`) - Improved deprecation message for offset aliases (:issue:`60820`) - Multiplying two :class:`DateOffset` objects will now raise a ``TypeError`` instead of a ``RecursionError`` (:issue:`59442`) @@ -87,7 +88,6 @@ Other enhancements - Support passing a :class:`Iterable[Hashable]` input to :meth:`DataFrame.drop_duplicates` (:issue:`59237`) - Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`) - Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`) - .. --------------------------------------------------------------------------- .. _whatsnew_300.notable_bug_fixes: diff --git a/pandas/tests/base/test_misc.py b/pandas/tests/base/test_misc.py index e8c57e5db340f..41f6d3a6c292d 100644 --- a/pandas/tests/base/test_misc.py +++ b/pandas/tests/base/test_misc.py @@ -147,13 +147,6 @@ def test_searchsorted(request, index_or_series_obj): # See gh-12238 obj = index_or_series_obj - # if isinstance(obj, pd.MultiIndex): - # # See gh-14833 - # request.applymarker( - # pytest.mark.xfail( - # reason="np.searchsorted doesn't work on pd.MultiIndex: GH 14833" - # ) - # ) if obj.dtype.kind == "c" and isinstance(obj, Index): # TODO: Should Series cases also raise? Looks like they use numpy # comparison semantics https://github.com/numpy/numpy/issues/15981 diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index 672a733ea4d57..4b4524ecb198a 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -1031,26 +1031,55 @@ def test_get_loc_namedtuple_behaves_like_tuple(): assert idx.get_loc(("i5", "i6")) == 2 -def test_searchsorted(): - # GH14833 - mi = MultiIndex.from_tuples([("a", 0), ("a", 1), ("b", 0), ("b", 1), ("c", 0)]) +@pytest.fixture +def mi(): + return MultiIndex.from_tuples([("a", 0), ("a", 1), ("b", 0), ("b", 1), ("c", 0)]) - assert np.all(mi.searchsorted(("b", 0)) == 2) - assert np.all(mi.searchsorted(("b", 0), side="right") == 3) - assert np.all(mi.searchsorted(("a", 0)) == 0) - assert np.all(mi.searchsorted(("a", -1)) == 0) - assert np.all(mi.searchsorted(("c", 1)) == 5) - result = mi.searchsorted([("a", 1), ("b", 0), ("c", 0)]) - expected = np.array([1, 2, 4], dtype=np.intp) - tm.assert_numpy_array_equal(result, expected) +@pytest.mark.parametrize( + "value, side, expected", + [ + (("b", 0), "left", 2), + (("b", 0), "right", 3), + (("a", 0), "left", 0), + (("a", -1), "left", 0), + (("c", 1), "left", 5), + ], + ids=[ + "b-0-left", + "b-0-right", + "a-0-left", + "a--1-left", + "c-1-left-beyond", + ], +) +def test_searchsorted_single(mi, value, side, expected): + # GH14833 + result = mi.searchsorted(value, side=side) + assert np.all(result == expected) + - result = mi.searchsorted([("a", 1), ("b", 0), ("c", 0)], side="right") - expected = np.array([2, 3, 5], dtype=np.intp) +@pytest.mark.parametrize( + "values, side, expected", + [ + ([("a", 1), ("b", 0), ("c", 0)], "left", np.array([1, 2, 4], dtype=np.intp)), + ([("a", 1), ("b", 0), ("c", 0)], "right", np.array([2, 3, 5], dtype=np.intp)), + ], + ids=["list-left", "list-right"], +) +def test_searchsorted_list(mi, values, side, expected): + result = mi.searchsorted(values, side=side) tm.assert_numpy_array_equal(result, expected) - with pytest.raises(ValueError, match="side must be either 'left' or 'right'"): - mi.searchsorted(("a", 1), side="middle") - with pytest.raises(TypeError, match="value must be a tuple or list"): - mi.searchsorted("a") +@pytest.mark.parametrize( + "value, side, error_type, match", + [ + (("a", 1), "middle", ValueError, "side must be either 'left' or 'right'"), + ("a", "left", TypeError, "value must be a tuple or list"), + ], + ids=["invalid-side", "invalid-value-type"], +) +def test_searchsorted_invalid(mi, value, side, error_type, match): + with pytest.raises(error_type, match=match): + mi.searchsorted(value, side=side) From 5e2caa4a1d2baa7c104f5305b8e61f6c149a3097 Mon Sep 17 00:00:00 2001 From: GSAUC3 Date: Tue, 20 May 2025 02:22:39 +0530 Subject: [PATCH 12/13] warning in rst file solved. closes issue 14833 --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index c8c784132c084..621eb68a78aa8 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -88,6 +88,7 @@ Other enhancements - Support passing a :class:`Iterable[Hashable]` input to :meth:`DataFrame.drop_duplicates` (:issue:`59237`) - Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`) - Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`) + .. --------------------------------------------------------------------------- .. _whatsnew_300.notable_bug_fixes: From 6b0d0abd4a6c53caedcf620556f56687b686e7f1 Mon Sep 17 00:00:00 2001 From: GSAUC3 Date: Tue, 20 May 2025 21:28:26 +0530 Subject: [PATCH 13/13] added searchsorted method in indexing.rst --- doc/source/reference/indexing.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/reference/indexing.rst b/doc/source/reference/indexing.rst index 7a4bc0f467f9a..dd08a1efce758 100644 --- a/doc/source/reference/indexing.rst +++ b/doc/source/reference/indexing.rst @@ -293,6 +293,7 @@ MultiIndex components MultiIndex.copy MultiIndex.append MultiIndex.truncate + MultiIndex.searchsorted MultiIndex selecting ~~~~~~~~~~~~~~~~~~~~