Skip to content

Commit b70a1b1

Browse files
jbrockmendelfeefladder
authored andcommitted
BUG: string slicing on MultiIndex DatetimeIndex level (pandas-dev#42476)
1 parent 1bd20a8 commit b70a1b1

File tree

7 files changed

+65
-68
lines changed

7 files changed

+65
-68
lines changed

doc/source/whatsnew/v1.4.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,8 @@ Indexing
217217
^^^^^^^^
218218
- Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`DatetimeIndex` when passing a string, the return type depended on whether the index was monotonic (:issue:`24892`)
219219
- Bug in :meth:`DataFrame.truncate` and :meth:`Series.truncate` when the object's Index has a length greater than one but only one unique value (:issue:`42365`)
220+
- Bug in indexing on a :class:`MultiIndex` failing to drop scalar levels when the indexer is a tuple containing a datetime-like string (:issue:`42476`)
221+
-
220222

221223
Missing
222224
^^^^^^^

pandas/core/indexes/base.py

-11
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@
4444
DtypeObj,
4545
F,
4646
Shape,
47-
T,
4847
npt,
4948
)
5049
from pandas.compat.numpy import function as nv
@@ -3719,16 +3718,6 @@ def _filter_indexer_tolerance(
37193718
# --------------------------------------------------------------------
37203719
# Indexer Conversion Methods
37213720

3722-
def _get_partial_string_timestamp_match_key(self, key: T) -> T:
3723-
"""
3724-
Translate any partial string timestamp matches in key, returning the
3725-
new key.
3726-
3727-
Only relevant for MultiIndex.
3728-
"""
3729-
# GH#10331
3730-
return key
3731-
37323721
@final
37333722
def _validate_positional_slice(self, key: slice) -> None:
37343723
"""

pandas/core/indexes/multi.py

+41-33
Original file line numberDiff line numberDiff line change
@@ -2581,35 +2581,6 @@ def _get_indexer_level_0(self, target) -> np.ndarray:
25812581
ci = Index(cat)
25822582
return ci.get_indexer_for(target)
25832583

2584-
def _get_partial_string_timestamp_match_key(self, key):
2585-
"""
2586-
Translate any partial string timestamp matches in key, returning the
2587-
new key.
2588-
2589-
Only relevant for MultiIndex.
2590-
"""
2591-
# GH#10331
2592-
if isinstance(key, str) and self.levels[0]._supports_partial_string_indexing:
2593-
# Convert key '2016-01-01' to
2594-
# ('2016-01-01'[, slice(None, None, None)]+)
2595-
key = (key,) + (slice(None),) * (len(self.levels) - 1)
2596-
2597-
if isinstance(key, tuple):
2598-
# Convert (..., '2016-01-01', ...) in tuple to
2599-
# (..., slice('2016-01-01', '2016-01-01', None), ...)
2600-
new_key = []
2601-
for i, component in enumerate(key):
2602-
if (
2603-
isinstance(component, str)
2604-
and self.levels[i]._supports_partial_string_indexing
2605-
):
2606-
new_key.append(slice(component, component, None))
2607-
else:
2608-
new_key.append(component)
2609-
key = tuple(new_key)
2610-
2611-
return key
2612-
26132584
def get_slice_bound(
26142585
self, label: Hashable | Sequence[Hashable], side: str, kind: str | None = None
26152586
) -> int:
@@ -2858,7 +2829,12 @@ def _maybe_to_slice(loc):
28582829
)
28592830

28602831
if keylen == self.nlevels and self.is_unique:
2861-
return self._engine.get_loc(key)
2832+
try:
2833+
return self._engine.get_loc(key)
2834+
except TypeError:
2835+
# e.g. partial string slicing
2836+
loc, _ = self.get_loc_level(key, list(range(self.nlevels)))
2837+
return loc
28622838

28632839
# -- partial selection or non-unique index
28642840
# break the key into 2 parts based on the lexsort_depth of the index;
@@ -3008,6 +2984,10 @@ def maybe_mi_droplevels(indexer, levels):
30082984
return (self._engine.get_loc(key), None)
30092985
except KeyError as err:
30102986
raise KeyError(key) from err
2987+
except TypeError:
2988+
# e.g. partial string indexing
2989+
# test_partial_string_timestamp_multiindex
2990+
pass
30112991

30122992
# partial selection
30132993
indexer = self.get_loc(key)
@@ -3019,7 +2999,19 @@ def maybe_mi_droplevels(indexer, levels):
30192999

30203000
# TODO: in some cases we still need to drop some levels,
30213001
# e.g. test_multiindex_perf_warn
3022-
ilevels = []
3002+
# test_partial_string_timestamp_multiindex
3003+
ilevels = [
3004+
i
3005+
for i in range(len(key))
3006+
if (
3007+
not isinstance(key[i], str)
3008+
or not self.levels[i]._supports_partial_string_indexing
3009+
)
3010+
and key[i] != slice(None, None)
3011+
]
3012+
if len(ilevels) == self.nlevels:
3013+
# TODO: why?
3014+
ilevels = []
30233015
return indexer, maybe_mi_droplevels(indexer, ilevels)
30243016

30253017
else:
@@ -3060,6 +3052,16 @@ def maybe_mi_droplevels(indexer, levels):
30603052
return indexer, maybe_mi_droplevels(indexer, ilevels)
30613053
else:
30623054
indexer = self._get_level_indexer(key, level=level)
3055+
if (
3056+
isinstance(key, str)
3057+
and self.levels[level]._supports_partial_string_indexing
3058+
):
3059+
# check to see if we did an exact lookup vs sliced
3060+
check = self.levels[level].get_loc(key)
3061+
if not is_integer(check):
3062+
# e.g. test_partial_string_timestamp_multiindex
3063+
return indexer, self[indexer]
3064+
30633065
return indexer, maybe_mi_droplevels(indexer, [level])
30643066

30653067
def _get_level_indexer(
@@ -3157,15 +3159,21 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes):
31573159

31583160
if level > 0 or self._lexsort_depth == 0:
31593161
# Desired level is not sorted
3162+
if isinstance(idx, slice):
3163+
locs = (level_codes >= idx.start) & (level_codes < idx.stop)
3164+
return locs
3165+
31603166
locs = np.array(level_codes == idx, dtype=bool, copy=False)
31613167
if not locs.any():
31623168
# The label is present in self.levels[level] but unused:
31633169
raise KeyError(key)
31643170
return locs
31653171

31663172
if isinstance(idx, slice):
3167-
start = idx.start
3168-
end = idx.stop
3173+
# e.g. test_partial_string_timestamp_multiindex
3174+
start = level_codes.searchsorted(idx.start, side="left")
3175+
# NB: "left" here bc of slice semantics
3176+
end = level_codes.searchsorted(idx.stop, side="left")
31693177
else:
31703178
start = level_codes.searchsorted(idx, side="left")
31713179
end = level_codes.searchsorted(idx, side="right")

pandas/core/indexing.py

-7
Original file line numberDiff line numberDiff line change
@@ -1129,12 +1129,6 @@ def _handle_lowerdim_multi_index_axis0(self, tup: tuple):
11291129
try:
11301130
# fast path for series or for tup devoid of slices
11311131
return self._get_label(tup, axis=axis)
1132-
except TypeError as err:
1133-
# slices are unhashable
1134-
# FIXME: this raises when we have a DatetimeIndex first level and a
1135-
# string for the first tup entry
1136-
# see test_partial_slicing_with_multiindex
1137-
raise IndexingError("No label returned") from err
11381132

11391133
except KeyError as ek:
11401134
# raise KeyError if number of indexers match
@@ -1149,7 +1143,6 @@ def _getitem_axis(self, key, axis: int):
11491143
key = list(key)
11501144

11511145
labels = self.obj._get_axis(axis)
1152-
key = labels._get_partial_string_timestamp_match_key(key)
11531146

11541147
if isinstance(key, slice):
11551148
self._validate_key(key, axis)

pandas/tests/indexes/datetimes/test_partial_slicing.py

+4-6
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
date_range,
1717
)
1818
import pandas._testing as tm
19-
from pandas.core.indexing import IndexingError
2019

2120

2221
class TestSlicing:
@@ -337,11 +336,10 @@ def test_partial_slicing_with_multiindex(self):
337336
result = df_multi.loc[("2013-06-19 09:30:00", "ACCT1", "ABC")]
338337
tm.assert_series_equal(result, expected)
339338

340-
# this is an IndexingError as we don't do partial string selection on
341-
# multi-levels.
342-
msg = "Too many indexers"
343-
with pytest.raises(IndexingError, match=msg):
344-
df_multi.loc[("2013-06-19", "ACCT1", "ABC")]
339+
# partial string indexing on first level, scalar indexing on the other two
340+
result = df_multi.loc[("2013-06-19", "ACCT1", "ABC")]
341+
expected = df_multi.iloc[:1].droplevel([1, 2])
342+
tm.assert_frame_equal(result, expected)
345343

346344
def test_partial_slicing_with_multiindex_series(self):
347345
# GH 4294

pandas/tests/indexes/multi/test_partial_indexing.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,9 @@ def test_partial_string_timestamp_multiindex(df):
7272

7373
# partial string match on date and hour, from middle
7474
result = df.loc["2016-01-02 12"]
75-
expected = df.iloc[9:12]
75+
# hourly resolution, same as index.levels[0], so we are _not_ slicing on
76+
# that level, so that level gets dropped
77+
expected = df.iloc[9:12].droplevel(0)
7678
tm.assert_frame_equal(result, expected)
7779

7880
# partial string match on secondary index
@@ -81,11 +83,14 @@ def test_partial_string_timestamp_multiindex(df):
8183
tm.assert_frame_equal(result, expected)
8284

8385
# tuple selector with partial string match on date
86+
# "2016-01-01" has daily resolution, so _is_ a slice on the first level.
8487
result = df.loc[("2016-01-01", "a"), :]
8588
expected = df.iloc[[0, 3]]
89+
expected = df.iloc[[0, 3]].droplevel(1)
8690
tm.assert_frame_equal(result, expected)
8791

88-
# Slicing date on first level should break (of course)
92+
# Slicing date on first level should break (of course) bc the DTI is the
93+
# second level on df_swap
8994
with pytest.raises(KeyError, match="'2016-01-01'"):
9095
df_swap.loc["2016-01-01"]
9196

pandas/tests/indexing/multiindex/test_loc.py

+11-9
Original file line numberDiff line numberDiff line change
@@ -554,15 +554,17 @@ def test_loc_period_string_indexing():
554554
),
555555
)
556556
result = df.loc[("2013Q1", 1111), "OMS"]
557-
expected = Series(
558-
[np.nan],
559-
dtype=object,
560-
name="OMS",
561-
index=MultiIndex.from_tuples(
562-
[(pd.Period("2013Q1"), 1111)], names=["Period", "CVR"]
563-
),
564-
)
565-
tm.assert_series_equal(result, expected)
557+
558+
alt = df.loc[(a[0], 1111), "OMS"]
559+
assert np.isnan(alt)
560+
561+
# Because the resolution of the string matches, it is an exact lookup,
562+
# not a slice
563+
assert np.isnan(result)
564+
565+
# TODO: should it figure this out?
566+
# alt = df.loc["2013Q1", 1111, "OMS"]
567+
# assert np.isnan(alt)
566568

567569

568570
def test_loc_datetime_mask_slicing():

0 commit comments

Comments
 (0)