-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
ENH: implement DatetimeLikeArray #19902
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
df9f894
004f137
db72494
80b525e
54009d4
65dc829
4c5a05c
b91ddac
080e477
e19f70a
47d365e
3a67bce
7fc73eb
9edd9bc
1236273
1ab6263
9a28188
6b17031
b03689a
a055d40
d1faeb6
fcb8d6a
8cee92c
375329e
71dfe08
59c60a2
9db2b78
308c25b
94bdfcb
d589e2a
0d4f48a
1b910c7
cece116
828022a
ed83046
c1934db
a684c2d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -121,8 +121,149 @@ def ceil(self, freq): | |
return self._round(freq, np.ceil) | ||
|
||
|
||
class DatetimeIndexOpsMixin(object): | ||
class DatetimeLikeArray(object): | ||
""" | ||
Shared Base/Mixin class for DatetimeArray, TimedeltaArray, PeriodArray | ||
|
||
Assumes that __new__/__init__ defines: | ||
_data | ||
freq | ||
""" | ||
|
||
@property | ||
def _box_func(self): | ||
""" | ||
box function to get object from internal representation | ||
""" | ||
raise com.AbstractMethodError(self) | ||
|
||
def __iter__(self): | ||
return (self._box_func(v) for v in self.asi8) | ||
|
||
@property | ||
def values(self): | ||
""" return the underlying data as an ndarray """ | ||
return self._data.view(np.ndarray) | ||
|
||
@property | ||
def asi8(self): | ||
# do not cache or you'll create a memory leak | ||
return self.values.view('i8') | ||
|
||
# ------------------------------------------------------------------ | ||
# Null Handling | ||
|
||
@property # NB: override with cache_readonly in immutable subclasses | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Did you have a PR started that made something like a Your PR probably did this, but ideally would would have a class attribute that indicates whether the class is immutable, and a single decorator for both. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yah the PR you're thinking of did exactly that. At the time there was only one property/cache_readonly affected, but this was the motivation. It'll be easy to revive if it becomes necessary. |
||
def _isnan(self): | ||
""" return if each value is nan""" | ||
return (self.asi8 == iNaT) | ||
|
||
@property # NB: override with cache_readonly in immutable subclasses | ||
def hasnans(self): | ||
""" return if I have any nans; enables various perf speedups """ | ||
return self._isnan.any() | ||
|
||
def _maybe_mask_results(self, result, fill_value=None, convert=None): | ||
""" | ||
Parameters | ||
---------- | ||
result : a ndarray | ||
convert : string/dtype or None | ||
|
||
Returns | ||
------- | ||
result : ndarray with values replace by the fill_value | ||
|
||
mask the result if needed, convert to the provided dtype if its not | ||
None | ||
|
||
This is an internal routine | ||
""" | ||
|
||
if self.hasnans: | ||
if convert: | ||
result = result.astype(convert) | ||
if fill_value is None: | ||
fill_value = np.nan | ||
result[self._isnan] = fill_value | ||
return result | ||
|
||
# ------------------------------------------------------------------ | ||
|
||
@property | ||
def freqstr(self): | ||
""" | ||
Return the frequency object as a string if its set, otherwise None | ||
""" | ||
if self.freq is None: | ||
return None | ||
return self.freq.freqstr | ||
|
||
@property # NB: override with cache_readonly in immutable subclasses | ||
def inferred_freq(self): | ||
""" | ||
Tryies to return a string representing a frequency guess, | ||
generated by infer_freq. Returns None if it can't autodetect the | ||
frequency. | ||
""" | ||
try: | ||
return frequencies.infer_freq(self) | ||
except ValueError: | ||
return None | ||
|
||
# ------------------------------------------------------------------ | ||
# Arithmetic Methods | ||
|
||
def _add_datelike(self, other): | ||
raise TypeError("cannot add {cls} and {typ}" | ||
.format(cls=type(self).__name__, | ||
typ=type(other).__name__)) | ||
|
||
def _sub_datelike(self, other): | ||
raise com.AbstractMethodError(self) | ||
|
||
def _sub_period(self, other): | ||
return NotImplemented | ||
|
||
def _add_delta(self, other): | ||
return NotImplemented | ||
|
||
def _add_delta_td(self, other): | ||
""" | ||
Add a delta of a timedeltalike | ||
return the i8 result view | ||
""" | ||
inc = delta_to_nanoseconds(other) | ||
new_values = checked_add_with_arr(self.asi8, inc, | ||
arr_mask=self._isnan).view('i8') | ||
if self.hasnans: | ||
new_values[self._isnan] = iNaT | ||
return new_values.view('i8') | ||
|
||
def _add_delta_tdi(self, other): | ||
""" | ||
Add a delta of a TimedeltaIndex | ||
return the i8 result view | ||
""" | ||
if not len(self) == len(other): | ||
raise ValueError("cannot add indices of unequal length") | ||
|
||
self_i8 = self.asi8 | ||
other_i8 = other.asi8 | ||
new_values = checked_add_with_arr(self_i8, other_i8, | ||
arr_mask=self._isnan, | ||
b_mask=other._isnan) | ||
if self.hasnans or other.hasnans: | ||
mask = (self._isnan) | (other._isnan) | ||
new_values[mask] = iNaT | ||
return new_values.view('i8') | ||
|
||
|
||
class DatetimeIndexOpsMixin(DatetimeLikeArray): | ||
""" common ops mixin to support a unified interface datetimelike Index """ | ||
inferred_freq = cache_readonly(DatetimeLikeArray.inferred_freq.fget) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, this isn't so bad... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Comment here to note why we do it like this (array is mutable, index is immutable). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will do. |
||
_isnan = cache_readonly(DatetimeLikeArray._isnan.fget) | ||
hasnans = cache_readonly(DatetimeLikeArray.hasnans.fget) | ||
|
||
def equals(self, other): | ||
""" | ||
|
@@ -152,9 +293,6 @@ def equals(self, other): | |
|
||
return np.array_equal(self.asi8, other.asi8) | ||
|
||
def __iter__(self): | ||
return (self._box_func(v) for v in self.asi8) | ||
|
||
@staticmethod | ||
def _join_i8_wrapper(joinf, dtype, with_indexers=True): | ||
""" create the join wrapper methods """ | ||
|
@@ -229,13 +367,6 @@ def _ensure_localized(self, result): | |
result = result.tz_localize(self.tz) | ||
return result | ||
|
||
@property | ||
def _box_func(self): | ||
""" | ||
box function to get object from internal representation | ||
""" | ||
raise com.AbstractMethodError(self) | ||
|
||
def _box_values(self, values): | ||
""" | ||
apply box func to passed values | ||
|
@@ -311,27 +442,6 @@ def __getitem__(self, key): | |
|
||
return self._simple_new(result, **attribs) | ||
|
||
@property | ||
def freqstr(self): | ||
""" | ||
Return the frequency object as a string if its set, otherwise None | ||
""" | ||
if self.freq is None: | ||
return None | ||
return self.freq.freqstr | ||
|
||
@cache_readonly | ||
def inferred_freq(self): | ||
""" | ||
Tryies to return a string representing a frequency guess, | ||
generated by infer_freq. Returns None if it can't autodetect the | ||
frequency. | ||
""" | ||
try: | ||
return frequencies.infer_freq(self) | ||
except ValueError: | ||
return None | ||
|
||
def _nat_new(self, box=True): | ||
""" | ||
Return Index or ndarray filled with NaT which has the same | ||
|
@@ -424,11 +534,6 @@ def get_duplicates(self): | |
_na_value = NaT | ||
"""The expected NA value to use with this index.""" | ||
|
||
@cache_readonly | ||
def _isnan(self): | ||
""" return if each value is nan""" | ||
return (self.asi8 == iNaT) | ||
|
||
@property | ||
def asobject(self): | ||
"""Return object Index which contains boxed values. | ||
|
@@ -449,31 +554,6 @@ def _convert_tolerance(self, tolerance, target): | |
'target index size') | ||
return tolerance | ||
|
||
def _maybe_mask_results(self, result, fill_value=None, convert=None): | ||
""" | ||
Parameters | ||
---------- | ||
result : a ndarray | ||
convert : string/dtype or None | ||
|
||
Returns | ||
------- | ||
result : ndarray with values replace by the fill_value | ||
|
||
mask the result if needed, convert to the provided dtype if its not | ||
None | ||
|
||
This is an internal routine | ||
""" | ||
|
||
if self.hasnans: | ||
if convert: | ||
result = result.astype(convert) | ||
if fill_value is None: | ||
fill_value = np.nan | ||
result[self._isnan] = fill_value | ||
return result | ||
|
||
def tolist(self): | ||
""" | ||
return a list of the underlying data | ||
|
@@ -630,17 +710,6 @@ def _convert_scalar_indexer(self, key, kind=None): | |
return (super(DatetimeIndexOpsMixin, self) | ||
._convert_scalar_indexer(key, kind=kind)) | ||
|
||
def _add_datelike(self, other): | ||
raise TypeError("cannot add {0} and {1}" | ||
.format(type(self).__name__, | ||
type(other).__name__)) | ||
|
||
def _sub_datelike(self, other): | ||
raise com.AbstractMethodError(self) | ||
|
||
def _sub_period(self, other): | ||
return NotImplemented | ||
|
||
def _addsub_offset_array(self, other, op): | ||
""" | ||
Add or subtract array-like of DateOffset objects | ||
|
@@ -802,42 +871,6 @@ def __isub__(self, other): | |
return self.__sub__(other) | ||
cls.__isub__ = __isub__ | ||
|
||
def _add_delta(self, other): | ||
return NotImplemented | ||
|
||
def _add_delta_td(self, other): | ||
""" | ||
Add a delta of a timedeltalike | ||
return the i8 result view | ||
""" | ||
|
||
inc = delta_to_nanoseconds(other) | ||
new_values = checked_add_with_arr(self.asi8, inc, | ||
arr_mask=self._isnan).view('i8') | ||
if self.hasnans: | ||
new_values[self._isnan] = iNaT | ||
return new_values.view('i8') | ||
|
||
def _add_delta_tdi(self, other): | ||
""" | ||
Add a delta of a TimedeltaIndex | ||
return the i8 result view | ||
""" | ||
|
||
# delta operation | ||
if not len(self) == len(other): | ||
raise ValueError("cannot add indices of unequal length") | ||
|
||
self_i8 = self.asi8 | ||
other_i8 = other.asi8 | ||
new_values = checked_add_with_arr(self_i8, other_i8, | ||
arr_mask=self._isnan, | ||
b_mask=other._isnan) | ||
if self.hasnans or other.hasnans: | ||
mask = (self._isnan) | (other._isnan) | ||
new_values[mask] = iNaT | ||
return new_values.view('i8') | ||
|
||
def isin(self, values): | ||
""" | ||
Compute boolean array of whether each index value is found in the | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe append
Mixin
to the name here, to indicate that this still can't be constructed and used on its own?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For testing purposes I was planning to implement a bare-bones
__new__
. That actually raises an important question: what is the canonical attribute to assign thevalues
input to? For DTI/TDI/PI right now it isself._data
, but for the Block subclasses it'sself.values
. Has a convention been established for ExtensionArrays?(none of which is mutually exclusive with
Mixin
being a good suggestion)