Skip to content

Commit d26e4e6

Browse files
author
MarcoGorelli
committed
refactor double-try-except and fix bug
1 parent 3a0db10 commit d26e4e6

File tree

3 files changed

+141
-171
lines changed

3 files changed

+141
-171
lines changed

Diff for: doc/source/whatsnew/v2.0.0.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -823,7 +823,8 @@ Datetimelike
823823
- Bug in :func:`to_datetime` was throwing ``ValueError`` when parsing dates with ISO8601 format where some values were not zero-padded (:issue:`21422`)
824824
- Bug in :func:`to_datetime` was giving incorrect results when using ``format='%Y%m%d'`` and ``errors='ignore'`` (:issue:`26493`)
825825
- Bug in :func:`to_datetime` was failing to parse date strings ``'today'`` and ``'now'`` if ``format`` was not ISO8601 (:issue:`50359`)
826-
- Bug in :func:`Timestamp.utctimetuple` raising a ``TypeError`` (:issue:`32174`)
826+
- Bug in :func:`to_datetime` was raising ``ValueError`` when parsing mixed-offset :class:`Timestamp` with ``errors='ignore'`` (:issue:`50585`)
827+
- Bug in :func:`to_datetime` was not returning the input when parsing out-of-bounds ``np.datetime64`` with ``errors='ignore'`` (:issue:`50587`)
827828

828829
Timedelta
829830
^^^^^^^^^

Diff for: pandas/_libs/tslib.pyx

+118-164
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ from pandas._libs.tslibs.np_datetime cimport (
3030
NPY_DATETIMEUNIT,
3131
NPY_FR_ns,
3232
check_dts_bounds,
33-
get_datetime64_value,
3433
npy_datetimestruct,
3534
npy_datetimestruct_to_datetime,
3635
pandas_datetime_to_datetimestruct,
@@ -505,144 +504,139 @@ cpdef array_to_datetime(
505504
result = np.empty(n, dtype="M8[ns]")
506505
iresult = result.view("i8")
507506

508-
try:
509-
for i in range(n):
510-
val = values[i]
511-
512-
try:
513-
if checknull_with_nat_and_na(val):
514-
iresult[i] = NPY_NAT
507+
for i in range(n):
508+
val = values[i]
515509

516-
elif PyDateTime_Check(val):
517-
if val.tzinfo is not None:
518-
found_tz = True
519-
else:
520-
found_naive = True
521-
tz_out = convert_timezone(
522-
val.tzinfo,
523-
tz_out,
524-
found_naive,
525-
found_tz,
526-
utc_convert,
527-
)
528-
result[i] = parse_pydatetime(val, &dts, utc_convert)
510+
try:
511+
if checknull_with_nat_and_na(val):
512+
iresult[i] = NPY_NAT
529513

530-
elif PyDate_Check(val):
531-
iresult[i] = pydate_to_dt64(val, &dts)
532-
check_dts_bounds(&dts)
514+
elif PyDateTime_Check(val):
515+
if val.tzinfo is not None:
516+
found_tz = True
517+
else:
518+
found_naive = True
519+
tz_out = convert_timezone(
520+
val.tzinfo,
521+
tz_out,
522+
found_naive,
523+
found_tz,
524+
utc_convert,
525+
)
526+
result[i] = parse_pydatetime(val, &dts, utc_convert)
527+
528+
elif PyDate_Check(val):
529+
iresult[i] = pydate_to_dt64(val, &dts)
530+
check_dts_bounds(&dts)
533531

534-
elif is_datetime64_object(val):
535-
iresult[i] = get_datetime64_nanos(val, NPY_FR_ns)
532+
elif is_datetime64_object(val):
533+
iresult[i] = get_datetime64_nanos(val, NPY_FR_ns)
536534

537-
elif is_integer_object(val) or is_float_object(val):
538-
# these must be ns unit by-definition
535+
elif is_integer_object(val) or is_float_object(val):
536+
# these must be ns unit by-definition
539537

540-
if val != val or val == NPY_NAT:
538+
if val != val or val == NPY_NAT:
539+
iresult[i] = NPY_NAT
540+
elif is_raise or is_ignore:
541+
iresult[i] = val
542+
else:
543+
# coerce
544+
# we now need to parse this as if unit='ns'
545+
# we can ONLY accept integers at this point
546+
# if we have previously (or in future accept
547+
# datetimes/strings, then we must coerce)
548+
try:
549+
iresult[i] = cast_from_unit(val, "ns")
550+
except OverflowError:
541551
iresult[i] = NPY_NAT
542-
elif is_raise or is_ignore:
543-
iresult[i] = val
544-
else:
545-
# coerce
546-
# we now need to parse this as if unit='ns'
547-
# we can ONLY accept integers at this point
548-
# if we have previously (or in future accept
549-
# datetimes/strings, then we must coerce)
550-
try:
551-
iresult[i] = cast_from_unit(val, "ns")
552-
except OverflowError:
553-
iresult[i] = NPY_NAT
554552

555-
elif isinstance(val, str):
556-
# string
557-
if type(val) is not str:
558-
# GH#32264 np.str_ object
559-
val = str(val)
553+
elif isinstance(val, str):
554+
# string
555+
if type(val) is not str:
556+
# GH#32264 np.str_ object
557+
val = str(val)
560558

561-
if len(val) == 0 or val in nat_strings:
562-
iresult[i] = NPY_NAT
559+
if len(val) == 0 or val in nat_strings:
560+
iresult[i] = NPY_NAT
561+
continue
562+
563+
string_to_dts_failed = string_to_dts(
564+
val, &dts, &out_bestunit, &out_local,
565+
&out_tzoffset, False, None, False
566+
)
567+
if string_to_dts_failed:
568+
# An error at this point is a _parsing_ error
569+
# specifically _not_ OutOfBoundsDatetime
570+
if parse_today_now(val, &iresult[i], utc):
563571
continue
564572

565-
string_to_dts_failed = string_to_dts(
566-
val, &dts, &out_bestunit, &out_local,
567-
&out_tzoffset, False, None, False
568-
)
569-
if string_to_dts_failed:
570-
# An error at this point is a _parsing_ error
571-
# specifically _not_ OutOfBoundsDatetime
572-
if parse_today_now(val, &iresult[i], utc):
573+
try:
574+
py_dt = parse_datetime_string(val,
575+
dayfirst=dayfirst,
576+
yearfirst=yearfirst)
577+
# If the dateutil parser returned tzinfo, capture it
578+
# to check if all arguments have the same tzinfo
579+
tz = py_dt.utcoffset()
580+
581+
except (ValueError, OverflowError):
582+
if is_coerce:
583+
iresult[i] = NPY_NAT
573584
continue
574-
575-
try:
576-
py_dt = parse_datetime_string(val,
577-
dayfirst=dayfirst,
578-
yearfirst=yearfirst)
579-
# If the dateutil parser returned tzinfo, capture it
580-
# to check if all arguments have the same tzinfo
581-
tz = py_dt.utcoffset()
582-
583-
except (ValueError, OverflowError):
584-
if is_coerce:
585-
iresult[i] = NPY_NAT
586-
continue
587-
raise TypeError(
588-
f"invalid string coercion to datetime "
589-
f"for \"{val}\", at position {i}"
590-
)
591-
592-
if tz is not None:
593-
seen_datetime_offset = True
594-
# dateutil timezone objects cannot be hashed, so
595-
# store the UTC offsets in seconds instead
596-
out_tzoffset_vals.add(tz.total_seconds())
597-
else:
598-
# Add a marker for naive string, to track if we are
599-
# parsing mixed naive and aware strings
600-
out_tzoffset_vals.add("naive")
601-
602-
_ts = convert_datetime_to_tsobject(py_dt, None)
603-
iresult[i] = _ts.value
604-
if not string_to_dts_failed:
605-
# No error reported by string_to_dts, pick back up
606-
# where we left off
607-
value = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts)
608-
if out_local == 1:
609-
seen_datetime_offset = True
610-
# Store the out_tzoffset in seconds
611-
# since we store the total_seconds of
612-
# dateutil.tz.tzoffset objects
613-
out_tzoffset_vals.add(out_tzoffset * 60.)
614-
tz = timezone(timedelta(minutes=out_tzoffset))
615-
value = tz_localize_to_utc_single(value, tz)
616-
out_local = 0
617-
out_tzoffset = 0
618-
else:
619-
# Add a marker for naive string, to track if we are
620-
# parsing mixed naive and aware strings
621-
out_tzoffset_vals.add("naive")
622-
iresult[i] = value
623-
check_dts_bounds(&dts)
624-
625-
else:
626-
if is_coerce:
627-
iresult[i] = NPY_NAT
585+
raise TypeError(
586+
f"invalid string coercion to datetime "
587+
f"for \"{val}\", at position {i}"
588+
)
589+
590+
if tz is not None:
591+
seen_datetime_offset = True
592+
# dateutil timezone objects cannot be hashed, so
593+
# store the UTC offsets in seconds instead
594+
out_tzoffset_vals.add(tz.total_seconds())
628595
else:
629-
raise TypeError(f"{type(val)} is not convertible to datetime")
630-
631-
except OutOfBoundsDatetime as ex:
632-
ex.args = (f"{ex}, at position {i}",)
633-
if is_coerce:
634-
iresult[i] = NPY_NAT
635-
continue
636-
raise
596+
# Add a marker for naive string, to track if we are
597+
# parsing mixed naive and aware strings
598+
out_tzoffset_vals.add("naive")
599+
600+
_ts = convert_datetime_to_tsobject(py_dt, None)
601+
iresult[i] = _ts.value
602+
if not string_to_dts_failed:
603+
# No error reported by string_to_dts, pick back up
604+
# where we left off
605+
value = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts)
606+
if out_local == 1:
607+
seen_datetime_offset = True
608+
# Store the out_tzoffset in seconds
609+
# since we store the total_seconds of
610+
# dateutil.tz.tzoffset objects
611+
out_tzoffset_vals.add(out_tzoffset * 60.)
612+
tz = timezone(timedelta(minutes=out_tzoffset))
613+
value = tz_localize_to_utc_single(value, tz)
614+
out_local = 0
615+
out_tzoffset = 0
616+
else:
617+
# Add a marker for naive string, to track if we are
618+
# parsing mixed naive and aware strings
619+
out_tzoffset_vals.add("naive")
620+
iresult[i] = value
621+
check_dts_bounds(&dts)
637622

638-
except OutOfBoundsDatetime:
639-
if is_raise:
640-
raise
623+
else:
624+
raise TypeError(f"{type(val)} is not convertible to datetime")
641625

642-
return ignore_errors_out_of_bounds_fallback(values), tz_out
626+
except (OutOfBoundsDatetime, ValueError) as ex:
627+
ex.args = (f"{ex}, at position {i}",)
628+
if is_coerce:
629+
iresult[i] = NPY_NAT
630+
continue
631+
elif is_raise:
632+
raise
633+
return values, None
643634

644-
except TypeError:
645-
return _array_to_datetime_object(values, errors, dayfirst, yearfirst)
635+
except TypeError:
636+
if is_coerce:
637+
iresult[i] = NPY_NAT
638+
continue
639+
return _array_to_datetime_object(values, errors, dayfirst, yearfirst)
646640

647641
if seen_datetime_offset and not utc_convert:
648642
# GH#17697
@@ -660,46 +654,6 @@ cpdef array_to_datetime(
660654
return result, tz_out
661655

662656

663-
@cython.wraparound(False)
664-
@cython.boundscheck(False)
665-
cdef ndarray[object] ignore_errors_out_of_bounds_fallback(ndarray[object] values):
666-
"""
667-
Fallback for array_to_datetime if an OutOfBoundsDatetime is raised
668-
and errors == "ignore"
669-
670-
Parameters
671-
----------
672-
values : ndarray[object]
673-
674-
Returns
675-
-------
676-
ndarray[object]
677-
"""
678-
cdef:
679-
Py_ssize_t i, n = len(values)
680-
object val
681-
682-
oresult = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_OBJECT, 0)
683-
684-
for i in range(n):
685-
val = values[i]
686-
687-
# set as nan except if its a NaT
688-
if checknull_with_nat_and_na(val):
689-
if isinstance(val, float):
690-
oresult[i] = np.nan
691-
else:
692-
oresult[i] = NaT
693-
elif is_datetime64_object(val):
694-
if get_datetime64_value(val) == NPY_NAT:
695-
oresult[i] = NaT
696-
else:
697-
oresult[i] = val.item()
698-
else:
699-
oresult[i] = val
700-
return oresult
701-
702-
703657
@cython.wraparound(False)
704658
@cython.boundscheck(False)
705659
cdef _array_to_datetime_object(

Diff for: pandas/tests/tools/test_to_datetime.py

+21-6
Original file line numberDiff line numberDiff line change
@@ -1047,6 +1047,7 @@ def test_to_datetime_dt64s_out_of_bounds(self, cache, dt):
10471047
def test_to_datetime_array_of_dt64s(self, cache, unit):
10481048
# https://github.com/pandas-dev/pandas/issues/31491
10491049
# Need at least 50 to ensure cache is used.
1050+
# https://github.com/pandas-dev/pandas/issues/50587
10501051
dts = [
10511052
np.datetime64("2000-01-01", unit),
10521053
np.datetime64("2000-01-02", unit),
@@ -1072,12 +1073,10 @@ def test_to_datetime_array_of_dt64s(self, cache, unit):
10721073
),
10731074
)
10741075

1075-
# With errors='ignore', out of bounds datetime64s
1076-
# are converted to their .item(), which depending on the version of
1077-
# numpy is either a python datetime.datetime or datetime.date
1076+
# With errors='ignore', the input is returned
10781077
tm.assert_index_equal(
10791078
to_datetime(dts_with_oob, errors="ignore", cache=cache),
1080-
Index([dt.item() for dt in dts_with_oob]),
1079+
Index(dts_with_oob),
10811080
)
10821081

10831082
def test_to_datetime_tz(self, cache):
@@ -1094,8 +1093,9 @@ def test_to_datetime_tz(self, cache):
10941093
)
10951094
tm.assert_index_equal(result, expected)
10961095

1097-
def test_to_datetime_tz_mixed_raises(self, cache):
1098-
# mixed tzs will raise
1096+
def test_to_datetime_tz_mixed(self, cache):
1097+
# mixed tzs will raise if errors='raise'
1098+
# https://github.com/pandas-dev/pandas/issues/50585
10991099
arr = [
11001100
Timestamp("2013-01-01 13:00:00", tz="US/Pacific"),
11011101
Timestamp("2013-01-02 14:00:00", tz="US/Eastern"),
@@ -1107,6 +1107,21 @@ def test_to_datetime_tz_mixed_raises(self, cache):
11071107
with pytest.raises(ValueError, match=msg):
11081108
to_datetime(arr, cache=cache)
11091109

1110+
result = to_datetime(arr, cache=cache, errors="ignore")
1111+
expected = Index(
1112+
[
1113+
Timestamp("2013-01-01 13:00:00-08:00"),
1114+
Timestamp("2013-01-02 14:00:00-05:00"),
1115+
],
1116+
dtype="object",
1117+
)
1118+
tm.assert_index_equal(result, expected)
1119+
result = to_datetime(arr, cache=cache, errors="coerce")
1120+
expected = DatetimeIndex(
1121+
["2013-01-01 13:00:00-08:00", "NaT"], dtype="datetime64[ns, US/Pacific]"
1122+
)
1123+
tm.assert_index_equal(result, expected)
1124+
11101125
def test_to_datetime_different_offsets(self, cache):
11111126
# inspired by asv timeseries.ToDatetimeNONISO8601 benchmark
11121127
# see GH-26097 for more

0 commit comments

Comments
 (0)