-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
Fix incorrect DTI/TDI indexing; warn before dropping tzinfo #22549
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
dd5cf53
4d34e14
d9cb515
5d2782a
a52af41
0c5e652
e9c3dd4
77abd10
acf7b6e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,7 +2,7 @@ | |
from __future__ import division | ||
import operator | ||
import warnings | ||
from datetime import time, datetime | ||
from datetime import time, datetime, timedelta | ||
|
||
import numpy as np | ||
from pytz import utc | ||
|
@@ -727,6 +727,10 @@ def to_period(self, freq=None): | |
""" | ||
from pandas.core.indexes.period import PeriodIndex | ||
|
||
if self.tz is not None: | ||
warnings.warn("Converting to PeriodIndex representation will " | ||
"drop timezone information.") | ||
|
||
if freq is None: | ||
freq = self.freqstr or self.inferred_freq | ||
|
||
|
@@ -737,7 +741,7 @@ def to_period(self, freq=None): | |
|
||
freq = get_period_alias(freq) | ||
|
||
return PeriodIndex(self.values, name=self.name, freq=freq, tz=self.tz) | ||
return PeriodIndex(self.values, name=self.name, freq=freq) | ||
|
||
def snap(self, freq='S'): | ||
""" | ||
|
@@ -1201,6 +1205,12 @@ def get_loc(self, key, method=None, tolerance=None): | |
key = Timestamp(key, tz=self.tz) | ||
return Index.get_loc(self, key, method, tolerance) | ||
|
||
if isinstance(key, timedelta): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if/elif here? |
||
# GH#20464 | ||
raise TypeError("Cannot index {cls} with {other}" | ||
.format(cls=type(self).__name__, | ||
other=type(key).__name__)) | ||
|
||
if isinstance(key, time): | ||
if method is not None: | ||
raise NotImplementedError('cannot yet lookup inexact labels ' | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
""" implement the TimedeltaIndex """ | ||
import operator | ||
from datetime import datetime | ||
|
||
import numpy as np | ||
from pandas.core.dtypes.common import ( | ||
|
@@ -487,7 +488,8 @@ def get_loc(self, key, method=None, tolerance=None): | |
------- | ||
loc : int | ||
""" | ||
if is_list_like(key): | ||
if is_list_like(key) or (isinstance(key, datetime) and key is not NaT): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not sure if we use isna else for NaT checking? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think in this context There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok this comment looks good |
||
# GH#20464 for datetime case | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. make this more explicit, meaning datetime dtype for all-NaT |
||
raise TypeError | ||
|
||
if isna(key): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -246,15 +246,19 @@ def setup_method(self, method): | |
def test_to_period_millisecond(self): | ||
index = self.index | ||
|
||
period = index.to_period(freq='L') | ||
with tm.assert_produces_warning(UserWarning): | ||
# warning that timezone info will be lost | ||
period = index.to_period(freq='L') | ||
assert 2 == len(period) | ||
assert period[0] == Period('2007-01-01 10:11:12.123Z', 'L') | ||
assert period[1] == Period('2007-01-01 10:11:13.789Z', 'L') | ||
|
||
def test_to_period_microsecond(self): | ||
index = self.index | ||
|
||
period = index.to_period(freq='U') | ||
with tm.assert_produces_warning(UserWarning): | ||
# warning that timezone info will be lost | ||
period = index.to_period(freq='U') | ||
assert 2 == len(period) | ||
assert period[0] == Period('2007-01-01 10:11:12.123456Z', 'U') | ||
assert period[1] == Period('2007-01-01 10:11:13.789123Z', 'U') | ||
|
@@ -266,81 +270,95 @@ def test_to_period_tz_pytz(self): | |
|
||
ts = date_range('1/1/2000', '4/1/2000', tz='US/Eastern') | ||
|
||
result = ts.to_period()[0] | ||
expected = ts[0].to_period() | ||
with tm.assert_produces_warning(UserWarning): | ||
# warning that timezone info will be lost | ||
result = ts.to_period()[0] | ||
expected = ts[0].to_period() | ||
|
||
assert result == expected | ||
tm.assert_index_equal(ts.to_period(), xp) | ||
assert result == expected | ||
tm.assert_index_equal(ts.to_period(), xp) | ||
|
||
ts = date_range('1/1/2000', '4/1/2000', tz=UTC) | ||
ts = date_range('1/1/2000', '4/1/2000', tz=UTC) | ||
|
||
result = ts.to_period()[0] | ||
expected = ts[0].to_period() | ||
result = ts.to_period()[0] | ||
expected = ts[0].to_period() | ||
|
||
assert result == expected | ||
tm.assert_index_equal(ts.to_period(), xp) | ||
assert result == expected | ||
tm.assert_index_equal(ts.to_period(), xp) | ||
|
||
ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) | ||
ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) | ||
|
||
result = ts.to_period()[0] | ||
expected = ts[0].to_period() | ||
result = ts.to_period()[0] | ||
expected = ts[0].to_period() | ||
|
||
assert result == expected | ||
tm.assert_index_equal(ts.to_period(), xp) | ||
assert result == expected | ||
tm.assert_index_equal(ts.to_period(), xp) | ||
|
||
def test_to_period_tz_warning(self): | ||
# GH#21333 make sure a warning is issued when timezone | ||
# info is lost | ||
dti = date_range('1/1/2000', '4/1/2000', tz='US/Eastern') | ||
with tm.assert_produces_warning(UserWarning): | ||
# warning that timezone info will be lost | ||
dti.to_period() | ||
|
||
def test_to_period_tz_explicit_pytz(self): | ||
xp = date_range('1/1/2000', '4/1/2000').to_period() | ||
|
||
ts = date_range('1/1/2000', '4/1/2000', tz=pytz.timezone('US/Eastern')) | ||
|
||
result = ts.to_period()[0] | ||
expected = ts[0].to_period() | ||
with tm.assert_produces_warning(UserWarning): | ||
# warning that timezone info will be lost | ||
result = ts.to_period()[0] | ||
expected = ts[0].to_period() | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. in future PR these should be parameterized |
||
assert result == expected | ||
tm.assert_index_equal(ts.to_period(), xp) | ||
assert result == expected | ||
tm.assert_index_equal(ts.to_period(), xp) | ||
|
||
ts = date_range('1/1/2000', '4/1/2000', tz=pytz.utc) | ||
ts = date_range('1/1/2000', '4/1/2000', tz=pytz.utc) | ||
|
||
result = ts.to_period()[0] | ||
expected = ts[0].to_period() | ||
result = ts.to_period()[0] | ||
expected = ts[0].to_period() | ||
|
||
assert result == expected | ||
tm.assert_index_equal(ts.to_period(), xp) | ||
assert result == expected | ||
tm.assert_index_equal(ts.to_period(), xp) | ||
|
||
ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) | ||
ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) | ||
|
||
result = ts.to_period()[0] | ||
expected = ts[0].to_period() | ||
result = ts.to_period()[0] | ||
expected = ts[0].to_period() | ||
|
||
assert result == expected | ||
tm.assert_index_equal(ts.to_period(), xp) | ||
assert result == expected | ||
tm.assert_index_equal(ts.to_period(), xp) | ||
|
||
def test_to_period_tz_dateutil(self): | ||
xp = date_range('1/1/2000', '4/1/2000').to_period() | ||
|
||
ts = date_range('1/1/2000', '4/1/2000', tz='dateutil/US/Eastern') | ||
|
||
result = ts.to_period()[0] | ||
expected = ts[0].to_period() | ||
with tm.assert_produces_warning(UserWarning): | ||
# warning that timezone info will be lost | ||
result = ts.to_period()[0] | ||
expected = ts[0].to_period() | ||
|
||
assert result == expected | ||
tm.assert_index_equal(ts.to_period(), xp) | ||
assert result == expected | ||
tm.assert_index_equal(ts.to_period(), xp) | ||
|
||
ts = date_range('1/1/2000', '4/1/2000', tz=dateutil.tz.tzutc()) | ||
ts = date_range('1/1/2000', '4/1/2000', tz=dateutil.tz.tzutc()) | ||
|
||
result = ts.to_period()[0] | ||
expected = ts[0].to_period() | ||
result = ts.to_period()[0] | ||
expected = ts[0].to_period() | ||
|
||
assert result == expected | ||
tm.assert_index_equal(ts.to_period(), xp) | ||
assert result == expected | ||
tm.assert_index_equal(ts.to_period(), xp) | ||
|
||
ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) | ||
ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) | ||
|
||
result = ts.to_period()[0] | ||
expected = ts[0].to_period() | ||
result = ts.to_period()[0] | ||
expected = ts[0].to_period() | ||
|
||
assert result == expected | ||
tm.assert_index_equal(ts.to_period(), xp) | ||
assert result == expected | ||
tm.assert_index_equal(ts.to_period(), xp) | ||
|
||
def test_to_period_nofreq(self): | ||
idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-04']) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,6 +41,14 @@ def test_getitem(self): | |
tm.assert_index_equal(result, expected) | ||
assert result.freq == expected.freq | ||
|
||
def test_timestamp_invalid_key(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you parameterize over a datetime as well |
||
# GH#20464 | ||
tdi = pd.timedelta_range(0, periods=10) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we have a test that indexes with NaT? (both Timedelta and Datetime dtype) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We have one for timedelta, just added one for datetime |
||
with pytest.raises(TypeError): | ||
tdi.get_loc(pd.Timestamp('1970-01-01')) | ||
with pytest.raises(TypeError): | ||
tdi.get_loc(pd.Timestamp('1970-01-02')) | ||
|
||
|
||
class TestWhere(object): | ||
# placeholder for symmetry with DatetimeIndex and PeriodIndex tests | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Warning type and stack level?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This questions applies to everywhere where you placed warnings.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No idea what warning type to use. Suggestions?
As to stack level, I tried a bunch to get that to work with tm.assert_produces_warning and eventually threw in the towel.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As you have it,
UserWarning
makes sense, but I think being explicit about it is good.I know what you mean regarding
stacklevel
. We generally try to get one that makes sense, and if the tests don't cooperate, we can just usecheck_stacklevel=False
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since the warnings are raised in direct user-api methods, normally putting a
stacklevel=2
should do the correct thing.What did you not get working in the tests? (which kind of code sample)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IIRC the problem is that the affected code paths issues both the new
FutureWarning
and in some cases also aPerformanceWarning
.tm.assert_produces_warning
doesn't support multiple expected warnings, and my attempt to modify it led to tests failing the stacklevel checks.