-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
ENH: implement DatetimeLikeArray #19902
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 31 commits
df9f894
004f137
db72494
80b525e
54009d4
65dc829
4c5a05c
b91ddac
080e477
e19f70a
47d365e
3a67bce
7fc73eb
9edd9bc
1236273
1ab6263
9a28188
6b17031
b03689a
a055d40
d1faeb6
fcb8d6a
8cee92c
375329e
71dfe08
59c60a2
9db2b78
308c25b
94bdfcb
d589e2a
0d4f48a
1b910c7
cece116
828022a
ed83046
c1934db
a684c2d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,5 @@ | ||
from .base import ExtensionArray # noqa | ||
from .categorical import Categorical # noqa | ||
from .datetimes import DatetimeArrayMixin # noqa | ||
from .timedeltas import TimedeltaArrayMixin # noqa | ||
from .periods import PeriodArrayMixin # noqa |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,169 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
import numpy as np | ||
|
||
from pandas._libs import iNaT | ||
from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds | ||
|
||
from pandas.tseries import frequencies | ||
|
||
import pandas.core.common as com | ||
from pandas.core.algorithms import checked_add_with_arr | ||
|
||
|
||
class DatetimeLikeArrayMixin(object): | ||
""" | ||
Shared Base/Mixin class for DatetimeArray, TimedeltaArray, PeriodArray | ||
|
||
Assumes that __new__/__init__ defines: | ||
_data | ||
_freq | ||
|
||
and that the inheriting class has methods: | ||
_validate_frequency | ||
""" | ||
|
||
@property | ||
def _box_func(self): | ||
""" | ||
box function to get object from internal representation | ||
""" | ||
raise com.AbstractMethodError(self) | ||
|
||
def __iter__(self): | ||
return (self._box_func(v) for v in self.asi8) | ||
|
||
@property | ||
def values(self): | ||
""" return the underlying data as an ndarray """ | ||
return self._data.view(np.ndarray) | ||
|
||
@property | ||
def asi8(self): | ||
# do not cache or you'll create a memory leak | ||
return self.values.view('i8') | ||
|
||
# ------------------------------------------------------------------ | ||
# Null Handling | ||
|
||
@property # NB: override with cache_readonly in immutable subclasses | ||
def _isnan(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this should be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm open to the name-change. This is the existing name. |
||
""" return if each value is nan""" | ||
return (self.asi8 == iNaT) | ||
|
||
@property # NB: override with cache_readonly in immutable subclasses | ||
def hasnans(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same: is this needed on the arrays? |
||
""" return if I have any nans; enables various perf speedups """ | ||
return self._isnan.any() | ||
|
||
def _maybe_mask_results(self, result, fill_value=None, convert=None): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how is this different from |
||
""" | ||
Parameters | ||
---------- | ||
result : a ndarray | ||
convert : string/dtype or None | ||
|
||
Returns | ||
------- | ||
result : ndarray with values replace by the fill_value | ||
|
||
mask the result if needed, convert to the provided dtype if its not | ||
None | ||
|
||
This is an internal routine | ||
""" | ||
|
||
if self.hasnans: | ||
if convert: | ||
result = result.astype(convert) | ||
if fill_value is None: | ||
fill_value = np.nan | ||
result[self._isnan] = fill_value | ||
return result | ||
|
||
# ------------------------------------------------------------------ | ||
# Frequency Properties/Methods | ||
|
||
@property | ||
def freq(self): | ||
"""Return the frequency object if it is set, otherwise None""" | ||
return self._freq | ||
|
||
@freq.setter | ||
def freq(self, value): | ||
if value is not None: | ||
value = frequencies.to_offset(value) | ||
self._validate_frequency(self, value) | ||
|
||
self._freq = value | ||
|
||
@property | ||
def freqstr(self): | ||
""" | ||
Return the frequency object as a string if its set, otherwise None | ||
""" | ||
if self.freq is None: | ||
return None | ||
return self.freq.freqstr | ||
|
||
@property # NB: override with cache_readonly in immutable subclasses | ||
def inferred_freq(self): | ||
""" | ||
Tryies to return a string representing a frequency guess, | ||
generated by infer_freq. Returns None if it can't autodetect the | ||
frequency. | ||
""" | ||
try: | ||
return frequencies.infer_freq(self) | ||
except ValueError: | ||
return None | ||
|
||
# ------------------------------------------------------------------ | ||
# Arithmetic Methods | ||
|
||
def _add_datelike(self, other): | ||
raise TypeError("cannot add {cls} and {typ}" | ||
.format(cls=type(self).__name__, | ||
typ=type(other).__name__)) | ||
|
||
def _sub_datelike(self, other): | ||
raise com.AbstractMethodError(self) | ||
|
||
def _sub_period(self, other): | ||
return NotImplemented | ||
|
||
def _add_offset(self, offset): | ||
raise com.AbstractMethodError(self) | ||
|
||
def _add_delta(self, other): | ||
return NotImplemented | ||
|
||
def _add_delta_td(self, other): | ||
""" | ||
Add a delta of a timedeltalike | ||
return the i8 result view | ||
""" | ||
inc = delta_to_nanoseconds(other) | ||
new_values = checked_add_with_arr(self.asi8, inc, | ||
arr_mask=self._isnan).view('i8') | ||
if self.hasnans: | ||
new_values[self._isnan] = iNaT | ||
return new_values.view('i8') | ||
|
||
def _add_delta_tdi(self, other): | ||
""" | ||
Add a delta of a TimedeltaIndex | ||
return the i8 result view | ||
""" | ||
if not len(self) == len(other): | ||
raise ValueError("cannot add indices of unequal length") | ||
|
||
self_i8 = self.asi8 | ||
other_i8 = other.asi8 | ||
new_values = checked_add_with_arr(self_i8, other_i8, | ||
arr_mask=self._isnan, | ||
b_mask=other._isnan) | ||
if self.hasnans or other.hasnans: | ||
mask = (self._isnan) | (other._isnan) | ||
new_values[mask] = iNaT | ||
return new_values.view('i8') |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
# -*- coding: utf-8 -*- | ||
import warnings | ||
|
||
import numpy as np | ||
|
||
from pandas._libs.tslib import Timestamp, NaT, iNaT | ||
from pandas._libs.tslibs import timezones | ||
|
||
from pandas.util._decorators import cache_readonly | ||
|
||
from pandas.core.dtypes.common import _NS_DTYPE, is_datetime64tz_dtype | ||
from pandas.core.dtypes.dtypes import DatetimeTZDtype | ||
|
||
from .datetimelike import DatetimeLikeArrayMixin | ||
|
||
|
||
class DatetimeArrayMixin(DatetimeLikeArrayMixin): | ||
""" | ||
Assumes that subclass __new__/__init__ defines: | ||
tz | ||
_freq | ||
_data | ||
""" | ||
|
||
# ----------------------------------------------------------------- | ||
# Descriptive Properties | ||
|
||
@property | ||
def _box_func(self): | ||
return lambda x: Timestamp(x, freq=self.freq, tz=self.tz) | ||
|
||
@cache_readonly | ||
def dtype(self): | ||
if self.tz is None: | ||
return _NS_DTYPE | ||
return DatetimeTZDtype('ns', self.tz) | ||
|
||
@property | ||
def tzinfo(self): | ||
""" | ||
Alias for tz attribute | ||
""" | ||
return self.tz | ||
|
||
@property # NB: override with cache_readonly in immutable subclasses | ||
def _timezone(self): | ||
""" Comparable timezone both for pytz / dateutil""" | ||
return timezones.get_timezone(self.tzinfo) | ||
|
||
@property | ||
def offset(self): | ||
"""get/set the frequency of the instance""" | ||
msg = ('DatetimeIndex.offset has been deprecated and will be removed ' | ||
'in a future version; use DatetimeIndex.freq instead.') | ||
warnings.warn(msg, FutureWarning, stacklevel=2) | ||
return self.freq | ||
|
||
@offset.setter | ||
def offset(self, value): | ||
"""get/set the frequency of the instance""" | ||
msg = ('DatetimeIndex.offset has been deprecated and will be removed ' | ||
'in a future version; use DatetimeIndex.freq instead.') | ||
warnings.warn(msg, FutureWarning, stacklevel=2) | ||
self.freq = value | ||
|
||
# ----------------------------------------------------------------- | ||
# Comparison Methods | ||
|
||
def _has_same_tz(self, other): | ||
zzone = self._timezone | ||
|
||
# vzone sholdn't be None if value is non-datetime like | ||
if isinstance(other, np.datetime64): | ||
# convert to Timestamp as np.datetime64 doesn't have tz attr | ||
other = Timestamp(other) | ||
vzone = timezones.get_timezone(getattr(other, 'tzinfo', '__no_tz__')) | ||
return zzone == vzone | ||
|
||
def _assert_tzawareness_compat(self, other): | ||
# adapted from _Timestamp._assert_tzawareness_compat | ||
other_tz = getattr(other, 'tzinfo', None) | ||
if is_datetime64tz_dtype(other): | ||
# Get tzinfo from Series dtype | ||
other_tz = other.dtype.tz | ||
if other is NaT: | ||
# pd.NaT quacks both aware and naive | ||
pass | ||
elif self.tz is None: | ||
if other_tz is not None: | ||
raise TypeError('Cannot compare tz-naive and tz-aware ' | ||
'datetime-like objects.') | ||
elif other_tz is None: | ||
raise TypeError('Cannot compare tz-naive and tz-aware ' | ||
'datetime-like objects') | ||
|
||
# ----------------------------------------------------------------- | ||
# Arithmetic Methods | ||
|
||
def _sub_datelike_dti(self, other): | ||
"""subtraction of two DatetimeIndexes""" | ||
if not len(self) == len(other): | ||
raise ValueError("cannot add indices of unequal length") | ||
|
||
self_i8 = self.asi8 | ||
other_i8 = other.asi8 | ||
new_values = self_i8 - other_i8 | ||
if self.hasnans or other.hasnans: | ||
mask = (self._isnan) | (other._isnan) | ||
new_values[mask] = iNaT | ||
return new_values.view('timedelta64[ns]') |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
from pandas._libs.tslibs.period import Period | ||
|
||
from pandas.util._decorators import cache_readonly | ||
|
||
from pandas.core.dtypes.dtypes import PeriodDtype | ||
|
||
from .datetimelike import DatetimeLikeArrayMixin | ||
|
||
|
||
class PeriodArrayMixin(DatetimeLikeArrayMixin): | ||
@property | ||
def _box_func(self): | ||
return lambda x: Period._from_ordinal(ordinal=x, freq=self.freq) | ||
|
||
@cache_readonly | ||
def dtype(self): | ||
return PeriodDtype.construct_from_string(self.freq) | ||
|
||
@property | ||
def _ndarray_values(self): | ||
# Ordinals | ||
return self._data | ||
|
||
@property | ||
def asi8(self): | ||
return self._ndarray_values.view('i8') |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
from pandas._libs.tslib import Timedelta | ||
|
||
from pandas.core.dtypes.common import _TD_DTYPE | ||
|
||
from .datetimelike import DatetimeLikeArrayMixin | ||
|
||
|
||
class TimedeltaArrayMixin(DatetimeLikeArrayMixin): | ||
@property | ||
def _box_func(self): | ||
return lambda x: Timedelta(x, unit='ns') | ||
|
||
@property | ||
def dtype(self): | ||
return _TD_DTYPE |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we want this for our arrays?
I am not sure this should be called
.values
.And we already have
_ndarray_values
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a good question, and I think relates to the inheritance vs composition question. As with several other comments, the narrow-context answer is that this PR is designed to involve essentially zero changes to behavior, is just moving methods/properties from their existing locations in the index classes.
As for questions about whether a method/property is needed in the array classes, these are all going to be needed by the arithmetic/comparison methods (ported in a later pass)