Skip to content

Commit f2cf915

Browse files
committed
Fix parsing corner case closes pandas-dev#19382
1 parent 3f3b4e0 commit f2cf915

File tree

4 files changed

+59
-11
lines changed

4 files changed

+59
-11
lines changed

pandas/_libs/tslib.pyx

+31-10
Original file line numberDiff line numberDiff line change
@@ -609,20 +609,26 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
609609
value = tz_convert_single(value, tz, 'UTC')
610610
iresult[i] = value
611611
check_dts_bounds(&dts)
612+
except OutOfBoundsDatetime:
613+
# GH#19382 for just-barely-OutOfBounds falling back to
614+
# dateutil parser will return incorrect result because
615+
# it will ignore nanoseconds
616+
if require_iso8601:
617+
if _handle_error_require_iso8601(val, &iresult[i],
618+
is_coerce, is_raise):
619+
continue
620+
return values
621+
elif is_coerce:
622+
iresult[i] = NPY_NAT
623+
continue
624+
raise
612625
except ValueError:
613626
# if requiring iso8601 strings, skip trying other formats
614627
if require_iso8601:
615-
if _parse_today_now(val, &iresult[i]):
628+
if _handle_error_require_iso8601(val, &iresult[i],
629+
is_coerce, is_raise):
616630
continue
617-
if is_coerce:
618-
iresult[i] = NPY_NAT
619-
continue
620-
elif is_raise:
621-
raise ValueError(
622-
"time data %r doesn't match format "
623-
"specified" % (val,))
624-
else:
625-
return values
631+
return values
626632

627633
try:
628634
py_dt = parse_datetime_string(val, dayfirst=dayfirst,
@@ -725,6 +731,21 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
725731
return oresult
726732

727733

734+
cdef bint _handle_error_require_iso8601(object val, int64_t* iresult,
735+
bint is_coerce,
736+
bint is_raise) except? -1:
737+
# Return True to continue, False to return values, or raise
738+
if _parse_today_now(val, iresult):
739+
return True
740+
elif is_coerce:
741+
iresult[0] = NPY_NAT
742+
return True
743+
elif is_raise:
744+
raise ValueError("time data {val} doesn't match format "
745+
"specified".format(val=val))
746+
return False
747+
748+
728749
cdef inline bint _parse_today_now(str val, int64_t* iresult):
729750
# We delay this check for as long as possible
730751
# because it catches relatively rare cases

pandas/_libs/tslibs/conversion.pyx

+8
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ from np_datetime cimport (check_dts_bounds,
2626
dt64_to_dtstruct, dtstruct_to_dt64,
2727
get_datetime64_unit, get_datetime64_value,
2828
pydatetime_to_dt64)
29+
from np_datetime import OutOfBoundsDatetime
2930

3031
from util cimport (is_string_object,
3132
is_datetime64_object,
@@ -472,6 +473,13 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit,
472473
ts = tz_localize_to_utc(np.array([ts], dtype='i8'), tz,
473474
ambiguous='raise',
474475
errors='raise')[0]
476+
477+
except OutOfBoundsDatetime:
478+
# GH#19382 for just-barely-OutOfBounds falling back to dateutil
479+
# parser will return incorrect result because it will ignore
480+
# nanoseconds
481+
raise
482+
475483
except ValueError:
476484
try:
477485
ts = parse_datetime_string(ts, dayfirst=dayfirst,

pandas/tests/indexes/datetimes/test_tools.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -783,7 +783,6 @@ def test_dataframe_dtypes(self, cache):
783783

784784

785785
class TestToDatetimeMisc(object):
786-
787786
@pytest.mark.parametrize('cache', [True, False])
788787
def test_to_datetime_iso8601(self, cache):
789788
result = to_datetime(["2012-01-01 00:00:00"], cache=cache)
@@ -1596,6 +1595,19 @@ def test_coerce_of_invalid_datetimes(self):
15961595
)
15971596
)
15981597

1598+
def test_to_datetime_barely_out_of_bounds(self):
1599+
# GH#19382 close enough to bounds that dropping nanos would result
1600+
# in an in-bounds datetime
1601+
arr = np.array(['2262-04-11 23:47:16.854775808'], dtype=object)
1602+
1603+
with pytest.raises(tslib.OutOfBoundsDatetime):
1604+
to_datetime(arr)
1605+
1606+
with pytest.raises(tslib.OutOfBoundsDatetime):
1607+
# Essentially the same as above, but more directly calling
1608+
# the relevant function
1609+
tslib.array_to_datetime(arr)
1610+
15991611

16001612
def test_normalize_date():
16011613
value = date(2012, 9, 7)

pandas/tests/scalar/test_timestamp.py

+7
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
from pandas.tseries import offsets
1717

18+
from pandas._libs.tslib import OutOfBoundsDatetime
1819
from pandas._libs.tslibs import conversion
1920
from pandas._libs.tslibs.timezones import get_timezone, dateutil_gettz as gettz
2021

@@ -410,6 +411,12 @@ def test_out_of_bounds_string(self):
410411
with pytest.raises(ValueError):
411412
Timestamp('2263-01-01')
412413

414+
def test_barely_out_of_bounds(self):
415+
# GH#19382 close enough to bounds that dropping nanos would result
416+
# in an in-bounds datetime
417+
with pytest.raises(OutOfBoundsDatetime):
418+
Timestamp('2262-04-11 23:47:16.854775808')
419+
413420
def test_bounds_with_different_units(self):
414421
out_of_bounds_dates = ('1677-09-21', '2262-04-12')
415422

0 commit comments

Comments
 (0)