Skip to content

Commit 8bea992

Browse files
committed
Merge remote-tracking branch 'upstream/master' into read_csv-doc
* upstream/master: BUG: Fix+test timezone-preservation in DTA.repeat (pandas-dev#24483) Implement reductions from pandas-dev#24024 (pandas-dev#24484)
2 parents e8a3050 + 1ebfd8a commit 8bea992

File tree

6 files changed

+193
-1
lines changed

6 files changed

+193
-1
lines changed

doc/source/whatsnew/v0.24.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1367,6 +1367,7 @@ Datetimelike
13671367
- Bug in :attr:`Series.dt` where the cache would not update properly after an in-place operation (:issue:`24408`)
13681368
- Bug in :class:`PeriodIndex` where comparisons against an array-like object with length 1 failed to raise ``ValueError`` (:issue:`23078`)
13691369
- Bug in :meth:`DatetimeIndex.astype`, :meth:`PeriodIndex.astype` and :meth:`TimedeltaIndex.astype` ignoring the sign of the ``dtype`` for unsigned integer dtypes (:issue:`24405`).
1370+
- Fixed bug in :meth:`Series.max` with ``datetime64[ns]``-dtype failing to return ``NaT`` when nulls are present and ``skipna=False`` is passed (:issue:`24265`)
13701371

13711372
Timedelta
13721373
^^^^^^^^^

pandas/core/arrays/datetimelike.py

+67-1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
2828
from pandas.core.dtypes.missing import isna
2929

30+
from pandas.core import nanops
3031
from pandas.core.algorithms import checked_add_with_arr, take, unique1d
3132
import pandas.core.common as com
3233

@@ -698,7 +699,7 @@ def repeat(self, repeats, *args, **kwargs):
698699
"""
699700
nv.validate_repeat(args, kwargs)
700701
values = self._data.repeat(repeats)
701-
return type(self)(values, dtype=self.dtype)
702+
return type(self)(values.view('i8'), dtype=self.dtype)
702703

703704
# ------------------------------------------------------------------
704705
# Null Handling
@@ -1381,6 +1382,71 @@ def _ensure_localized(self, arg, ambiguous='raise', nonexistent='raise',
13811382
)
13821383
return arg
13831384

1385+
# --------------------------------------------------------------
1386+
# Reductions
1387+
1388+
def _reduce(self, name, axis=0, skipna=True, **kwargs):
1389+
op = getattr(self, name, None)
1390+
if op:
1391+
return op(axis=axis, skipna=skipna, **kwargs)
1392+
else:
1393+
raise TypeError("cannot perform {name} with type {dtype}"
1394+
.format(name=name, dtype=self.dtype))
1395+
# TODO: use super(DatetimeLikeArrayMixin, self)._reduce
1396+
# after we subclass ExtensionArray
1397+
1398+
def min(self, axis=None, skipna=True, *args, **kwargs):
1399+
"""
1400+
Return the minimum value of the Array or minimum along
1401+
an axis.
1402+
1403+
See Also
1404+
--------
1405+
numpy.ndarray.min
1406+
Index.min : Return the minimum value in an Index.
1407+
Series.min : Return the minimum value in a Series.
1408+
"""
1409+
nv.validate_min(args, kwargs)
1410+
nv.validate_minmax_axis(axis)
1411+
1412+
result = nanops.nanmin(self.asi8, skipna=skipna, mask=self.isna())
1413+
if isna(result):
1414+
# Period._from_ordinal does not handle np.nan gracefully
1415+
return NaT
1416+
return self._box_func(result)
1417+
1418+
def max(self, axis=None, skipna=True, *args, **kwargs):
1419+
"""
1420+
Return the maximum value of the Array or maximum along
1421+
an axis.
1422+
1423+
See Also
1424+
--------
1425+
numpy.ndarray.max
1426+
Index.max : Return the maximum value in an Index.
1427+
Series.max : Return the maximum value in a Series.
1428+
"""
1429+
# TODO: skipna is broken with max.
1430+
# See https://github.com/pandas-dev/pandas/issues/24265
1431+
nv.validate_max(args, kwargs)
1432+
nv.validate_minmax_axis(axis)
1433+
1434+
mask = self.isna()
1435+
if skipna:
1436+
values = self[~mask].asi8
1437+
elif mask.any():
1438+
return NaT
1439+
else:
1440+
values = self.asi8
1441+
1442+
if not len(values):
1443+
# short-circut for empty max / min
1444+
return NaT
1445+
1446+
result = nanops.nanmax(values, skipna=skipna)
1447+
# Don't have to worry about NA `result`, since no NA went in.
1448+
return self._box_func(result)
1449+
13841450

13851451
DatetimeLikeArrayMixin._add_comparison_ops()
13861452

pandas/tests/arrays/test_datetimelike.py

+7
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,13 @@ def test_scalar_from_string(self):
157157
result = arr._scalar_from_string(str(arr[0]))
158158
assert result == arr[0]
159159

160+
def test_reduce_invalid(self):
161+
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
162+
arr = self.array_cls(data, freq='D')
163+
164+
with pytest.raises(TypeError, match='cannot perform'):
165+
arr._reduce("not a method")
166+
160167
def test_searchsorted(self):
161168
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
162169
arr = self.array_cls(data, freq='D')

pandas/tests/arrays/test_datetimes.py

+48
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,16 @@ def test_setitem_clears_freq(self):
113113
a[0] = pd.Timestamp("2000", tz="US/Central")
114114
assert a.freq is None
115115

116+
def test_repeat_preserves_tz(self):
117+
dti = pd.date_range('2000', periods=2, freq='D', tz='US/Central')
118+
arr = DatetimeArray(dti)
119+
120+
repeated = arr.repeat([1, 1])
121+
122+
# preserves tz and values, but not freq
123+
expected = DatetimeArray(arr.asi8, freq=None, tz=arr.tz)
124+
tm.assert_equal(repeated, expected)
125+
116126

117127
class TestSequenceToDT64NS(object):
118128

@@ -126,3 +136,41 @@ def test_tz_dtype_matches(self):
126136
result, _, _ = sequence_to_dt64ns(
127137
arr, dtype=DatetimeTZDtype(tz="US/Central"))
128138
tm.assert_numpy_array_equal(arr._data, result)
139+
140+
141+
class TestReductions(object):
142+
143+
@pytest.mark.parametrize("tz", [None, "US/Central"])
144+
def test_min_max(self, tz):
145+
arr = DatetimeArray._from_sequence([
146+
'2000-01-03',
147+
'2000-01-03',
148+
'NaT',
149+
'2000-01-02',
150+
'2000-01-05',
151+
'2000-01-04',
152+
], tz=tz)
153+
154+
result = arr.min()
155+
expected = pd.Timestamp('2000-01-02', tz=tz)
156+
assert result == expected
157+
158+
result = arr.max()
159+
expected = pd.Timestamp('2000-01-05', tz=tz)
160+
assert result == expected
161+
162+
result = arr.min(skipna=False)
163+
assert result is pd.NaT
164+
165+
result = arr.max(skipna=False)
166+
assert result is pd.NaT
167+
168+
@pytest.mark.parametrize("tz", [None, "US/Central"])
169+
@pytest.mark.parametrize('skipna', [True, False])
170+
def test_min_max_empty(self, skipna, tz):
171+
arr = DatetimeArray._from_sequence([], tz=tz)
172+
result = arr.min(skipna=skipna)
173+
assert result is pd.NaT
174+
175+
result = arr.max(skipna=skipna)
176+
assert result is pd.NaT

pandas/tests/arrays/test_period.py

+39
Original file line numberDiff line numberDiff line change
@@ -261,3 +261,42 @@ def test_repr_large():
261261
"Length: 1000, dtype: period[D]"
262262
)
263263
assert result == expected
264+
265+
266+
# ----------------------------------------------------------------------------
267+
# Reductions
268+
269+
class TestReductions(object):
270+
271+
def test_min_max(self):
272+
arr = period_array([
273+
'2000-01-03',
274+
'2000-01-03',
275+
'NaT',
276+
'2000-01-02',
277+
'2000-01-05',
278+
'2000-01-04',
279+
], freq='D')
280+
281+
result = arr.min()
282+
expected = pd.Period('2000-01-02', freq='D')
283+
assert result == expected
284+
285+
result = arr.max()
286+
expected = pd.Period('2000-01-05', freq='D')
287+
assert result == expected
288+
289+
result = arr.min(skipna=False)
290+
assert result is pd.NaT
291+
292+
result = arr.max(skipna=False)
293+
assert result is pd.NaT
294+
295+
@pytest.mark.parametrize('skipna', [True, False])
296+
def test_min_max_empty(self, skipna):
297+
arr = period_array([], freq='D')
298+
result = arr.min(skipna=skipna)
299+
assert result is pd.NaT
300+
301+
result = arr.max(skipna=skipna)
302+
assert result is pd.NaT

pandas/tests/arrays/test_timedeltas.py

+31
Original file line numberDiff line numberDiff line change
@@ -93,3 +93,34 @@ def test_setitem_clears_freq(self):
9393
a = TimedeltaArray(pd.timedelta_range('1H', periods=2, freq='H'))
9494
a[0] = pd.Timedelta("1H")
9595
assert a.freq is None
96+
97+
98+
class TestReductions(object):
99+
100+
def test_min_max(self):
101+
arr = TimedeltaArray._from_sequence([
102+
'3H', '3H', 'NaT', '2H', '5H', '4H',
103+
])
104+
105+
result = arr.min()
106+
expected = pd.Timedelta('2H')
107+
assert result == expected
108+
109+
result = arr.max()
110+
expected = pd.Timedelta('5H')
111+
assert result == expected
112+
113+
result = arr.min(skipna=False)
114+
assert result is pd.NaT
115+
116+
result = arr.max(skipna=False)
117+
assert result is pd.NaT
118+
119+
@pytest.mark.parametrize('skipna', [True, False])
120+
def test_min_max_empty(self, skipna):
121+
arr = TimedeltaArray._from_sequence([])
122+
result = arr.min(skipna=skipna)
123+
assert result is pd.NaT
124+
125+
result = arr.max(skipna=skipna)
126+
assert result is pd.NaT

0 commit comments

Comments
 (0)