Skip to content

Commit 3a78f7f

Browse files
committed
Merge pull request #6364 from jreback/ix_perf
PERF: change Series indexing on multi-indexes to use a fast path (GH5567)
2 parents 217f2f8 + 4be0c0b commit 3a78f7f

File tree

9 files changed

+55
-28
lines changed

9 files changed

+55
-28
lines changed

Diff for: doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ Improvements to existing features
8181
- Allow multi-index slicers (:issue:`6134`, :issue:`4036`, :issue:`3057`, :issue:`2598`, :issue:`5641`)
8282
- improve performance of slice indexing on Series with string keys (:issue:`6341`)
8383
- implement joining a single-level indexed DataFrame on a matching column of a multi-indexed DataFrame (:issue:`3662`)
84+
- Performance improvement in indexing into a multi-indexed Series (:issue:`5567`)
8485

8586
.. _release.bug_fixes-0.14.0:
8687

Diff for: pandas/core/common.py

+10
Original file line numberDiff line numberDiff line change
@@ -1557,6 +1557,16 @@ def _maybe_box(indexer, values, obj, key):
15571557
# return the value
15581558
return values
15591559

1560+
def _maybe_box_datetimelike(value):
1561+
# turn a datetime like into a Timestamp/timedelta as needed
1562+
1563+
if isinstance(value, np.datetime64):
1564+
value = tslib.Timestamp(value)
1565+
elif isinstance(value, np.timedelta64):
1566+
pass
1567+
1568+
return value
1569+
15601570
_values_from_object = lib.values_from_object
15611571

15621572
def _possibly_convert_objects(values, convert_dates=True,

Diff for: pandas/core/frame.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1588,7 +1588,7 @@ def _ixs(self, i, axis=0, copy=False):
15881588
result = self.reindex(i, takeable=True)
15891589
copy=True
15901590
else:
1591-
new_values, copy = self._data.fast_2d_xs(i, copy=copy)
1591+
new_values, copy = self._data.fast_xs(i, copy=copy)
15921592
result = Series(new_values, index=self.columns,
15931593
name=self.index[i], dtype=new_values.dtype)
15941594
result._set_is_copy(self, copy=copy)

Diff for: pandas/core/generic.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@
2121
from pandas import compat, _np_version_under1p7
2222
from pandas.compat import map, zip, lrange, string_types, isidentifier
2323
from pandas.core.common import (isnull, notnull, is_list_like,
24-
_values_from_object, _maybe_promote, ABCSeries,
25-
SettingWithCopyError, SettingWithCopyWarning)
24+
_values_from_object, _maybe_promote, _maybe_box_datetimelike,
25+
ABCSeries, SettingWithCopyError, SettingWithCopyWarning)
2626
import pandas.core.nanops as nanops
2727
from pandas.util.decorators import Appender, Substitution
2828
from pandas.core import config
@@ -1304,7 +1304,12 @@ def xs(self, key, axis=0, level=None, copy=True, drop_level=True):
13041304

13051305
if np.isscalar(loc):
13061306
from pandas import Series
1307-
new_values, copy = self._data.fast_2d_xs(loc, copy=copy)
1307+
new_values, copy = self._data.fast_xs(loc, copy=copy)
1308+
1309+
# may need to box a datelike-scalar
1310+
if not is_list_like(new_values):
1311+
return _maybe_box_datetimelike(new_values)
1312+
13081313
result = Series(new_values, index=self.columns,
13091314
name=self.index[loc])
13101315

Diff for: pandas/core/indexing.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,15 @@ def __getitem__(self, key):
7070
return self._getitem_axis(key, axis=0)
7171

7272
def _get_label(self, label, axis=0):
73-
# ueber-hack
7473
if self.ndim == 1:
75-
return self.obj[label]
74+
# for perf reasons we want to try _xs first
75+
# as its basically direct indexing
76+
# but will fail when the index is not present
77+
# see GH5667
78+
try:
79+
return self.obj._xs(label, axis=axis, copy=False)
80+
except:
81+
return self.obj[label]
7682
elif (isinstance(label, tuple) and
7783
isinstance(label[axis], slice)):
7884
raise IndexingError('no slices here, handle elsewhere')

Diff for: pandas/core/internals.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -2829,7 +2829,7 @@ def xs(self, key, axis=1, copy=True, takeable=False):
28292829

28302830
return self.__class__(new_blocks, new_axes)
28312831

2832-
def fast_2d_xs(self, loc, copy=False):
2832+
def fast_xs(self, loc, copy=False):
28332833
"""
28342834
get a cross sectional for a given location in the
28352835
items ; handle dups
@@ -3757,6 +3757,12 @@ def _consolidate_check(self):
37573757
def _consolidate_inplace(self):
37583758
pass
37593759

3760+
def fast_xs(self, loc, copy=False):
3761+
"""
3762+
fast path for getting a cross-section
3763+
"""
3764+
result = self._block.values[loc]
3765+
return result, False
37603766

37613767
def construction_error(tot_items, block_shape, axes, e=None):
37623768
""" raise a helpful message about our construction """

Diff for: pandas/core/series.py

-20
Original file line numberDiff line numberDiff line change
@@ -427,26 +427,6 @@ def _unpickle_series_compat(self, state):
427427
def axes(self):
428428
return [self.index]
429429

430-
def _maybe_box(self, values):
431-
""" genericically box the values """
432-
433-
if isinstance(values, self.__class__):
434-
return values
435-
elif not hasattr(values, '__iter__'):
436-
v = lib.infer_dtype([values])
437-
if v == 'datetime':
438-
return lib.Timestamp(v)
439-
return values
440-
441-
v = lib.infer_dtype(values)
442-
if v == 'datetime':
443-
return lib.map_infer(values, lib.Timestamp)
444-
445-
if isinstance(values, np.ndarray):
446-
return self.__class__(values)
447-
448-
return values
449-
450430
def _ixs(self, i, axis=0):
451431
"""
452432
Return the i-th value or values in the Series by location

Diff for: vb_suite/indexing.py

+19
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,25 @@
9191
name='indexing_frame_get_value',
9292
start_date=datetime(2011, 11, 12))
9393

94+
setup = common_setup + """
95+
mi = MultiIndex.from_tuples([(x,y) for x in range(1000) for y in range(1000)])
96+
s = Series(np.random.randn(1000000), index=mi)
97+
"""
98+
99+
series_xs_mi_ix = Benchmark("s.ix[999]", setup,
100+
name='series_xs_mi_ix',
101+
start_date=datetime(2013, 1, 1))
102+
103+
setup = common_setup + """
104+
mi = MultiIndex.from_tuples([(x,y) for x in range(1000) for y in range(1000)])
105+
s = Series(np.random.randn(1000000), index=mi)
106+
df = DataFrame(s)
107+
"""
108+
109+
frame_xs_mi_ix = Benchmark("df.ix[999]", setup,
110+
name='frame_xs_mi_ix',
111+
start_date=datetime(2013, 1, 1))
112+
94113
#----------------------------------------------------------------------
95114
# Boolean DataFrame row selection
96115

Diff for: vb_suite/join_merge.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -220,5 +220,5 @@ def sample(values, k):
220220
"""
221221

222222
join_non_unique_equal = Benchmark('fracofday * temp[fracofday.index]', setup,
223-
start_date=datetime(2013 1, 1))
223+
start_date=datetime(2013, 1, 1))
224224

0 commit comments

Comments
 (0)