Skip to content

Commit 214e474

Browse files
authored
ENH: Support Series[bool] as indexer for iloc.__getitem__ (#61162)
* updated indexing.py to allow iloc.__getitem__ * Updated test_iloc_mask test * bugfix test_iloc_mask test * bugfix test_iloc_mask * whatsnew * added test to test_iloc_mask * formatting * precommit * added tests for series bool mask * precommit * reformatted tests
1 parent cc4f585 commit 214e474

File tree

3 files changed

+52
-25
lines changed

3 files changed

+52
-25
lines changed

Diff for: doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ Other enhancements
6868
- :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`)
6969
- :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`)
7070
- :meth:`DataFrame.apply` supports using third-party execution engines like the Bodo.ai JIT compiler (:issue:`60668`)
71+
- :meth:`DataFrame.iloc` and :meth:`Series.iloc` now support boolean masks in ``__getitem__`` for more consistent indexing behavior (:issue:`60994`)
7172
- :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
7273
- :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`)
7374
- :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)

Diff for: pandas/core/indexing.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -1582,11 +1582,7 @@ def _validate_key(self, key, axis: AxisInt) -> None:
15821582
if com.is_bool_indexer(key):
15831583
if hasattr(key, "index") and isinstance(key.index, Index):
15841584
if key.index.inferred_type == "integer":
1585-
raise NotImplementedError(
1586-
"iLocation based boolean "
1587-
"indexing on an integer type "
1588-
"is not available"
1589-
)
1585+
return
15901586
raise ValueError(
15911587
"iLocation based boolean indexing cannot use an indexable as a mask"
15921588
)

Diff for: pandas/tests/indexing/test_iloc.py

+50-20
Original file line numberDiff line numberDiff line change
@@ -726,15 +726,16 @@ def test_iloc_setitem_with_scalar_index(self, indexer, value):
726726

727727
@pytest.mark.filterwarnings("ignore::UserWarning")
728728
def test_iloc_mask(self):
729-
# GH 3631, iloc with a mask (of a series) should raise
729+
# GH 60994, iloc with a mask (of a series) should return accordingly
730730
df = DataFrame(list(range(5)), index=list("ABCDE"), columns=["a"])
731731
mask = df.a % 2 == 0
732732
msg = "iLocation based boolean indexing cannot use an indexable as a mask"
733733
with pytest.raises(ValueError, match=msg):
734734
df.iloc[mask]
735+
735736
mask.index = range(len(mask))
736-
msg = "iLocation based boolean indexing on an integer type is not available"
737-
with pytest.raises(NotImplementedError, match=msg):
737+
msg = "Unalignable boolean Series provided as indexer"
738+
with pytest.raises(IndexingError, match=msg):
738739
df.iloc[mask]
739740

740741
# ndarray ok
@@ -753,18 +754,13 @@ def test_iloc_mask(self):
753754
(None, ".iloc"): "0b1100",
754755
("index", ""): "0b11",
755756
("index", ".loc"): "0b11",
756-
("index", ".iloc"): (
757-
"iLocation based boolean indexing cannot use an indexable as a mask"
758-
),
759-
("locs", ""): "Unalignable boolean Series provided as indexer "
760-
"(index of the boolean Series and of the indexed "
761-
"object do not match).",
762-
("locs", ".loc"): "Unalignable boolean Series provided as indexer "
763-
"(index of the boolean Series and of the "
764-
"indexed object do not match).",
765-
("locs", ".iloc"): (
766-
"iLocation based boolean indexing on an integer type is not available"
767-
),
757+
(
758+
"index",
759+
".iloc",
760+
): "iLocation based boolean indexing cannot use an indexable as a mask",
761+
("locs", ""): "Unalignable boolean Series provided as indexer",
762+
("locs", ".loc"): "Unalignable boolean Series provided as indexer",
763+
("locs", ".iloc"): "Unalignable boolean Series provided as indexer",
768764
}
769765

770766
# UserWarnings from reindex of a boolean mask
@@ -780,18 +776,52 @@ def test_iloc_mask(self):
780776
else:
781777
accessor = df
782778
answer = str(bin(accessor[mask]["nums"].sum()))
783-
except (ValueError, IndexingError, NotImplementedError) as err:
779+
except (ValueError, IndexingError) as err:
784780
answer = str(err)
785781

786782
key = (
787783
idx,
788784
method,
789785
)
790-
r = expected.get(key)
791-
if r != answer:
792-
raise AssertionError(
793-
f"[{key}] does not match [{answer}], received [{r}]"
786+
expected_result = expected.get(key)
787+
788+
# Fix the assertion to check for substring match
789+
if (
790+
idx is None or (idx == "index" and method != ".iloc")
791+
) and "0b" in expected_result:
792+
# For successful numeric results, exact match is needed
793+
assert expected_result == answer, (
794+
f"[{key}] does not match [{answer}]"
794795
)
796+
else:
797+
# For error messages, substring match is sufficient
798+
assert expected_result in answer, f"[{key}] not found in [{answer}]"
799+
800+
def test_iloc_with_numpy_bool_array(self):
801+
df = DataFrame(list(range(5)), index=list("ABCDE"), columns=["a"])
802+
result = df.iloc[np.array([True, False, True, False, True], dtype=bool)]
803+
expected = DataFrame({"a": [0, 2, 4]}, index=["A", "C", "E"])
804+
tm.assert_frame_equal(result, expected)
805+
806+
def test_iloc_series_mask_with_index_mismatch_raises(self):
807+
df = DataFrame(list(range(5)), index=list("ABCDE"), columns=["a"])
808+
mask = df.a % 2 == 0
809+
msg = "Unalignable boolean Series provided as indexer"
810+
with pytest.raises(IndexingError, match=msg):
811+
df.iloc[Series([True] * len(mask), dtype=bool)]
812+
813+
def test_iloc_series_mask_all_true(self):
814+
df = DataFrame(list(range(5)), columns=["a"])
815+
mask = Series([True] * len(df), dtype=bool)
816+
result = df.iloc[mask]
817+
tm.assert_frame_equal(result, df)
818+
819+
def test_iloc_series_mask_alternate_true(self):
820+
df = DataFrame(list(range(5)), columns=["a"])
821+
mask = Series([True, False, True, False, True], dtype=bool)
822+
result = df.iloc[mask]
823+
expected = DataFrame({"a": [0, 2, 4]}, index=[0, 2, 4])
824+
tm.assert_frame_equal(result, expected)
795825

796826
def test_iloc_non_unique_indexing(self):
797827
# GH 4017, non-unique indexing (on the axis)

0 commit comments

Comments
 (0)