Skip to content

De-duplicate add_offset_array methods #19835

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Feb 24, 2018
86 changes: 58 additions & 28 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Base and utility classes for tseries type pandas objects.
"""
import warnings

import operator
from datetime import datetime, timedelta

from pandas import compat
Expand All @@ -25,13 +25,14 @@
is_integer_dtype,
is_object_dtype,
is_string_dtype,
is_period_dtype,
is_timedelta64_dtype)
from pandas.core.dtypes.generic import (
ABCIndex, ABCSeries, ABCPeriodIndex, ABCIndexClass)
from pandas.core.dtypes.missing import isna
from pandas.core import common as com, algorithms, ops
from pandas.core.algorithms import checked_add_with_arr
from pandas.errors import NullFrequencyError
from pandas.errors import NullFrequencyError, PerformanceWarning
import pandas.io.formats.printing as printing
from pandas._libs import lib, iNaT, NaT
from pandas._libs.tslibs.period import Period
Expand Down Expand Up @@ -637,13 +638,32 @@ def _sub_datelike(self, other):
def _sub_period(self, other):
return NotImplemented

def _add_offset_array(self, other):
# Array/Index of DateOffset objects
return NotImplemented
def _addsub_offset_array(self, other, op):
"""
Add or subtract array-like of DateOffset objects

def _sub_offset_array(self, other):
# Array/Index of DateOffset objects
return NotImplemented
Parameters
----------
other : Index, np.ndarray
object-dtype containing pd.DateOffset objects
op : {operator.add, operator.sub}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you assert on the operator


Returns
-------
result : same class as self
"""
if len(other) == 1:
return op(self, other[0])

warnings.warn("Adding/subtracting array of DateOffsets to "
"{cls} not vectorized"
.format(cls=type(self).__name__), PerformanceWarning)

res_values = op(self.astype('O').values, np.array(other))
kwargs = {}
if not is_period_dtype(self):
kwargs['freq'] = 'infer'
return self.__class__(res_values, **kwargs)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use .shallow_copy

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed it to shallow_copy and it broke several tests, so just pushed with it changed back to self.__class__. For some reason DatetimeIndex._simple_new returns an ndarray if its input values have object-dtype, which is the case here.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so try using _shallow_copy_with_infer, we by definition never ever use self.__class__ in Index construction.


@classmethod
def _add_datetimelike_methods(cls):
Expand All @@ -660,25 +680,30 @@ def __add__(self, other):
other = lib.item_from_zerodim(other)
if isinstance(other, ABCSeries):
return NotImplemented
elif is_timedelta64_dtype(other):

# scalar others
elif isinstance(other, (DateOffset, timedelta, np.timedelta64)):
result = self._add_delta(other)
elif isinstance(other, (DateOffset, timedelta)):
elif isinstance(other, (datetime, np.datetime64)):
result = self._add_datelike(other)
elif is_integer(other):
# This check must come after the check for np.timedelta64
# as is_integer returns True for these
result = self.shift(other)

# array-like others
elif is_timedelta64_dtype(other):
# TimedeltaIndex, ndarray[timedelta64]
result = self._add_delta(other)
elif is_offsetlike(other):
# Array/Index of DateOffset objects
result = self._add_offset_array(other)
result = self._addsub_offset_array(other, operator.add)
elif isinstance(self, TimedeltaIndex) and isinstance(other, Index):
if hasattr(other, '_add_delta'):
result = other._add_delta(self)
else:
raise TypeError("cannot add TimedeltaIndex and {typ}"
.format(typ=type(other)))
elif is_integer(other):
# This check must come after the check for timedelta64_dtype
# or else it will incorrectly catch np.timedelta64 objects
result = self.shift(other)
elif isinstance(other, (datetime, np.datetime64)):
result = self._add_datelike(other)
elif isinstance(other, Index):
result = self._add_datelike(other)
elif is_integer_dtype(other) and self.freq is None:
Expand All @@ -704,28 +729,33 @@ def __sub__(self, other):
other = lib.item_from_zerodim(other)
if isinstance(other, ABCSeries):
return NotImplemented
elif is_timedelta64_dtype(other):

# scalar others
elif isinstance(other, (DateOffset, timedelta, np.timedelta64)):
result = self._add_delta(-other)
elif isinstance(other, (DateOffset, timedelta)):
elif isinstance(other, (datetime, np.datetime64)):
result = self._sub_datelike(other)
elif is_integer(other):
# This check must come after the check for np.timedelta64
# as is_integer returns True for these
result = self.shift(-other)
elif isinstance(other, Period):
result = self._sub_period(other)

# array-like others
elif is_timedelta64_dtype(other):
# TimedeltaIndex, ndarray[timedelta64]
result = self._add_delta(-other)
elif is_offsetlike(other):
# Array/Index of DateOffset objects
result = self._sub_offset_array(other)
result = self._addsub_offset_array(other, operator.sub)
elif isinstance(self, TimedeltaIndex) and isinstance(other, Index):
if not isinstance(other, TimedeltaIndex):
raise TypeError("cannot subtract TimedeltaIndex and {typ}"
.format(typ=type(other).__name__))
result = self._add_delta(-other)
elif isinstance(other, DatetimeIndex):
result = self._sub_datelike(other)
elif is_integer(other):
# This check must come after the check for timedelta64_dtype
# or else it will incorrectly catch np.timedelta64 objects
result = self.shift(-other)
elif isinstance(other, (datetime, np.datetime64)):
result = self._sub_datelike(other)
elif isinstance(other, Period):
result = self._sub_period(other)
elif isinstance(other, Index):
raise TypeError("cannot subtract {typ1} and {typ2}"
.format(typ1=type(self).__name__,
Expand Down
23 changes: 0 additions & 23 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -963,29 +963,6 @@ def _add_offset(self, offset):
"or DatetimeIndex", PerformanceWarning)
return self.astype('O') + offset

def _add_offset_array(self, other):
# Array/Index of DateOffset objects
if len(other) == 1:
return self + other[0]
else:
warnings.warn("Adding/subtracting array of DateOffsets to "
"{} not vectorized".format(type(self)),
PerformanceWarning)
return self.astype('O') + np.array(other)
# TODO: pass freq='infer' like we do in _sub_offset_array?
# TODO: This works for __add__ but loses dtype in __sub__

def _sub_offset_array(self, other):
# Array/Index of DateOffset objects
if len(other) == 1:
return self - other[0]
else:
warnings.warn("Adding/subtracting array of DateOffsets to "
"{} not vectorized".format(type(self)),
PerformanceWarning)
res_values = self.astype('O').values - np.array(other)
return self.__class__(res_values, freq='infer')

def _format_native_types(self, na_rep='NaT', date_format=None, **kwargs):
from pandas.io.formats.format import _get_format_datetime64_from_values
format = _get_format_datetime64_from_values(self, date_format)
Expand Down
23 changes: 0 additions & 23 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@
from pandas.util._decorators import (Appender, Substitution, cache_readonly,
deprecate_kwarg)
from pandas.compat import zip, u
from pandas.errors import PerformanceWarning

import pandas.core.indexes.base as ibase
_index_doc_kwargs = dict(ibase._index_doc_kwargs)
Expand Down Expand Up @@ -747,28 +746,6 @@ def _sub_period(self, other):
# result must be Int64Index or Float64Index
return Index(new_data)

def _add_offset_array(self, other):
# Array/Index of DateOffset objects
if len(other) == 1:
return self + other[0]
else:
warnings.warn("Adding/subtracting array of DateOffsets to "
"{cls} not vectorized"
.format(cls=type(self).__name__), PerformanceWarning)
res_values = self.astype('O').values + np.array(other)
return self.__class__(res_values)

def _sub_offset_array(self, other):
# Array/Index of DateOffset objects
if len(other) == 1:
return self - other[0]
else:
warnings.warn("Adding/subtracting array of DateOffsets to "
"{cls} not vectorized"
.format(cls=type(self).__name__), PerformanceWarning)
res_values = self.astype('O').values - np.array(other)
return self.__class__(res_values)

def shift(self, n):
"""
Specialized shift which produces an PeriodIndex
Expand Down
38 changes: 5 additions & 33 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
""" implement the TimedeltaIndex """

from datetime import timedelta
import warnings

import numpy as np
from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -433,43 +432,16 @@ def _sub_datelike(self, other):
else:
raise TypeError("cannot subtract a datelike from a TimedeltaIndex")

def _add_offset_array(self, other):
# Array/Index of DateOffset objects
def _addsub_offset_array(self, other, op):
# Add or subtract Array-like of DateOffset objects
try:
# TimedeltaIndex can only operate with a subset of DateOffset
# subclasses. Incompatible classes will raise AttributeError,
# which we re-raise as TypeError
if len(other) == 1:
return self + other[0]
else:
from pandas.errors import PerformanceWarning
warnings.warn("Adding/subtracting array of DateOffsets to "
"{} not vectorized".format(type(self)),
PerformanceWarning)
return self.astype('O') + np.array(other)
# TODO: pass freq='infer' like we do in _sub_offset_array?
# TODO: This works for __add__ but loses dtype in __sub__
except AttributeError:
raise TypeError("Cannot add non-tick DateOffset to TimedeltaIndex")

def _sub_offset_array(self, other):
# Array/Index of DateOffset objects
try:
# TimedeltaIndex can only operate with a subset of DateOffset
# subclasses. Incompatible classes will raise AttributeError,
# which we re-raise as TypeError
if len(other) == 1:
return self - other[0]
else:
from pandas.errors import PerformanceWarning
warnings.warn("Adding/subtracting array of DateOffsets to "
"{} not vectorized".format(type(self)),
PerformanceWarning)
res_values = self.astype('O').values - np.array(other)
return self.__class__(res_values, freq='infer')
return DatetimeIndexOpsMixin._addsub_offset_array(self, other, op)
except AttributeError:
raise TypeError("Cannot subtrack non-tick DateOffset from"
" TimedeltaIndex")
raise TypeError("Cannot add/subtract non-tick DateOffset to {cls}"
.format(cls=type(self).__name__))

def _format_native_types(self, na_rep=u('NaT'),
date_format=None, **kwargs):
Expand Down