Skip to content

Commit 7e8ff50

Browse files
mroeschkepmhatre1
authored andcommitted
PERF: Allow ensure_index_from_sequence to return RangeIndex (pandas-dev#57786)
1 parent 8bd68f5 commit 7e8ff50

File tree

3 files changed

+50
-26
lines changed

3 files changed

+50
-26
lines changed

pandas/core/indexes/base.py

+41-3
Original file line numberDiff line numberDiff line change
@@ -7135,6 +7135,43 @@ def shape(self) -> Shape:
71357135
return (len(self),)
71367136

71377137

7138+
def maybe_sequence_to_range(sequence) -> Any | range:
7139+
"""
7140+
Convert a 1D, non-pandas sequence to a range if possible.
7141+
7142+
Returns the input if not possible.
7143+
7144+
Parameters
7145+
----------
7146+
sequence : 1D sequence
7147+
names : sequence of str
7148+
7149+
Returns
7150+
-------
7151+
Any : input or range
7152+
"""
7153+
if isinstance(sequence, (ABCSeries, Index)):
7154+
return sequence
7155+
np_sequence = np.asarray(sequence)
7156+
if np_sequence.dtype.kind != "i" or len(np_sequence) == 1:
7157+
return sequence
7158+
elif len(np_sequence) == 0:
7159+
return range(0)
7160+
diff = np_sequence[1] - np_sequence[0]
7161+
if diff == 0:
7162+
return sequence
7163+
elif len(np_sequence) == 2:
7164+
return range(np_sequence[0], np_sequence[1] + diff, diff)
7165+
maybe_range_indexer, remainder = np.divmod(np_sequence - np_sequence[0], diff)
7166+
if (
7167+
lib.is_range_indexer(maybe_range_indexer, len(maybe_range_indexer))
7168+
and not remainder.any()
7169+
):
7170+
return range(np_sequence[0], np_sequence[-1] + diff, diff)
7171+
else:
7172+
return sequence
7173+
7174+
71387175
def ensure_index_from_sequences(sequences, names=None) -> Index:
71397176
"""
71407177
Construct an index from sequences of data.
@@ -7153,8 +7190,8 @@ def ensure_index_from_sequences(sequences, names=None) -> Index:
71537190
71547191
Examples
71557192
--------
7156-
>>> ensure_index_from_sequences([[1, 2, 3]], names=["name"])
7157-
Index([1, 2, 3], dtype='int64', name='name')
7193+
>>> ensure_index_from_sequences([[1, 2, 4]], names=["name"])
7194+
Index([1, 2, 4], dtype='int64', name='name')
71587195
71597196
>>> ensure_index_from_sequences([["a", "a"], ["a", "b"]], names=["L1", "L2"])
71607197
MultiIndex([('a', 'a'),
@@ -7170,8 +7207,9 @@ def ensure_index_from_sequences(sequences, names=None) -> Index:
71707207
if len(sequences) == 1:
71717208
if names is not None:
71727209
names = names[0]
7173-
return Index(sequences[0], name=names)
7210+
return Index(maybe_sequence_to_range(sequences[0]), name=names)
71747211
else:
7212+
# TODO: Apply maybe_sequence_to_range to sequences?
71757213
return MultiIndex.from_arrays(sequences, names=names)
71767214

71777215

pandas/core/indexes/range.py

+4-20
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
doc,
3030
)
3131

32-
from pandas.core.dtypes import missing
3332
from pandas.core.dtypes.base import ExtensionDtype
3433
from pandas.core.dtypes.common import (
3534
ensure_platform_int,
@@ -475,28 +474,13 @@ def _shallow_copy(self, values, name: Hashable = no_default):
475474
if values.dtype.kind == "i" and values.ndim == 1:
476475
# GH 46675 & 43885: If values is equally spaced, return a
477476
# more memory-compact RangeIndex instead of Index with 64-bit dtype
478-
if len(values) == 0:
479-
return type(self)._simple_new(_empty_range, name=name)
480-
elif len(values) == 1:
477+
if len(values) == 1:
481478
start = values[0]
482479
new_range = range(start, start + self.step, self.step)
483480
return type(self)._simple_new(new_range, name=name)
484-
diff = values[1] - values[0]
485-
if not missing.isna(diff) and diff != 0:
486-
if len(values) == 2:
487-
# Can skip is_range_indexer check
488-
new_range = range(values[0], values[-1] + diff, diff)
489-
return type(self)._simple_new(new_range, name=name)
490-
else:
491-
maybe_range_indexer, remainder = np.divmod(values - values[0], diff)
492-
if (
493-
lib.is_range_indexer(
494-
maybe_range_indexer, len(maybe_range_indexer)
495-
)
496-
and not remainder.any()
497-
):
498-
new_range = range(values[0], values[-1] + diff, diff)
499-
return type(self)._simple_new(new_range, name=name)
481+
maybe_range = ibase.maybe_sequence_to_range(values)
482+
if isinstance(maybe_range, range):
483+
return type(self)._simple_new(maybe_range, name=name)
500484
return self._constructor._simple_new(values, name=name)
501485

502486
def _view(self) -> Self:

pandas/tests/indexes/test_base.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -1514,8 +1514,10 @@ class TestIndexUtils:
15141514
@pytest.mark.parametrize(
15151515
"data, names, expected",
15161516
[
1517-
([[1, 2, 3]], None, Index([1, 2, 3])),
1518-
([[1, 2, 3]], ["name"], Index([1, 2, 3], name="name")),
1517+
([[1, 2, 4]], None, Index([1, 2, 4])),
1518+
([[1, 2, 4]], ["name"], Index([1, 2, 4], name="name")),
1519+
([[1, 2, 3]], None, RangeIndex(1, 4)),
1520+
([[1, 2, 3]], ["name"], RangeIndex(1, 4, name="name")),
15191521
(
15201522
[["a", "a"], ["c", "d"]],
15211523
None,
@@ -1530,7 +1532,7 @@ class TestIndexUtils:
15301532
)
15311533
def test_ensure_index_from_sequences(self, data, names, expected):
15321534
result = ensure_index_from_sequences(data, names)
1533-
tm.assert_index_equal(result, expected)
1535+
tm.assert_index_equal(result, expected, exact=True)
15341536

15351537
def test_ensure_index_mixed_closed_intervals(self):
15361538
# GH27172

0 commit comments

Comments
 (0)