Skip to content

Commit 9d29eea

Browse files
author
MarcoGorelli
committed
refactor and fix bug
1 parent 3a0db10 commit 9d29eea

File tree

3 files changed

+139
-129
lines changed

3 files changed

+139
-129
lines changed

Diff for: doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -824,6 +824,7 @@ Datetimelike
824824
- Bug in :func:`to_datetime` was giving incorrect results when using ``format='%Y%m%d'`` and ``errors='ignore'`` (:issue:`26493`)
825825
- Bug in :func:`to_datetime` was failing to parse date strings ``'today'`` and ``'now'`` if ``format`` was not ISO8601 (:issue:`50359`)
826826
- Bug in :func:`Timestamp.utctimetuple` raising a ``TypeError`` (:issue:`32174`)
827+
- Bug in :func:`to_datetime` was raising ``ValueError`` when parsing mixed-offset :class:`Timestamp` with ``errors='ignore'`` (:issue:`50585`)
827828

828829
Timedelta
829830
^^^^^^^^^

Diff for: pandas/_libs/tslib.pyx

+114-124
Original file line numberDiff line numberDiff line change
@@ -505,144 +505,134 @@ cpdef array_to_datetime(
505505
result = np.empty(n, dtype="M8[ns]")
506506
iresult = result.view("i8")
507507

508-
try:
509-
for i in range(n):
510-
val = values[i]
511-
512-
try:
513-
if checknull_with_nat_and_na(val):
514-
iresult[i] = NPY_NAT
508+
for i in range(n):
509+
val = values[i]
515510

516-
elif PyDateTime_Check(val):
517-
if val.tzinfo is not None:
518-
found_tz = True
519-
else:
520-
found_naive = True
521-
tz_out = convert_timezone(
522-
val.tzinfo,
523-
tz_out,
524-
found_naive,
525-
found_tz,
526-
utc_convert,
527-
)
528-
result[i] = parse_pydatetime(val, &dts, utc_convert)
511+
try:
512+
if checknull_with_nat_and_na(val):
513+
iresult[i] = NPY_NAT
529514

530-
elif PyDate_Check(val):
531-
iresult[i] = pydate_to_dt64(val, &dts)
532-
check_dts_bounds(&dts)
515+
elif PyDateTime_Check(val):
516+
if val.tzinfo is not None:
517+
found_tz = True
518+
else:
519+
found_naive = True
520+
tz_out = convert_timezone(
521+
val.tzinfo,
522+
tz_out,
523+
found_naive,
524+
found_tz,
525+
utc_convert,
526+
)
527+
result[i] = parse_pydatetime(val, &dts, utc_convert)
528+
529+
elif PyDate_Check(val):
530+
iresult[i] = pydate_to_dt64(val, &dts)
531+
check_dts_bounds(&dts)
533532

534-
elif is_datetime64_object(val):
535-
iresult[i] = get_datetime64_nanos(val, NPY_FR_ns)
533+
elif is_datetime64_object(val):
534+
iresult[i] = get_datetime64_nanos(val, NPY_FR_ns)
536535

537-
elif is_integer_object(val) or is_float_object(val):
538-
# these must be ns unit by-definition
536+
elif is_integer_object(val) or is_float_object(val):
537+
# these must be ns unit by-definition
539538

540-
if val != val or val == NPY_NAT:
541-
iresult[i] = NPY_NAT
542-
elif is_raise or is_ignore:
543-
iresult[i] = val
544-
else:
545-
# coerce
546-
# we now need to parse this as if unit='ns'
547-
# we can ONLY accept integers at this point
548-
# if we have previously (or in future accept
549-
# datetimes/strings, then we must coerce)
550-
try:
551-
iresult[i] = cast_from_unit(val, "ns")
552-
except OverflowError:
553-
iresult[i] = NPY_NAT
554-
555-
elif isinstance(val, str):
556-
# string
557-
if type(val) is not str:
558-
# GH#32264 np.str_ object
559-
val = str(val)
560-
561-
if len(val) == 0 or val in nat_strings:
539+
if val != val or val == NPY_NAT:
540+
iresult[i] = NPY_NAT
541+
elif is_raise or is_ignore:
542+
iresult[i] = val
543+
else:
544+
# coerce
545+
# we now need to parse this as if unit='ns'
546+
# we can ONLY accept integers at this point
547+
# if we have previously (or in future accept
548+
# datetimes/strings, then we must coerce)
549+
try:
550+
iresult[i] = cast_from_unit(val, "ns")
551+
except OverflowError:
562552
iresult[i] = NPY_NAT
563-
continue
564553

565-
string_to_dts_failed = string_to_dts(
566-
val, &dts, &out_bestunit, &out_local,
567-
&out_tzoffset, False, None, False
568-
)
569-
if string_to_dts_failed:
570-
# An error at this point is a _parsing_ error
571-
# specifically _not_ OutOfBoundsDatetime
572-
if parse_today_now(val, &iresult[i], utc):
573-
continue
574-
575-
try:
576-
py_dt = parse_datetime_string(val,
577-
dayfirst=dayfirst,
578-
yearfirst=yearfirst)
579-
# If the dateutil parser returned tzinfo, capture it
580-
# to check if all arguments have the same tzinfo
581-
tz = py_dt.utcoffset()
582-
583-
except (ValueError, OverflowError):
584-
if is_coerce:
585-
iresult[i] = NPY_NAT
586-
continue
587-
raise TypeError(
588-
f"invalid string coercion to datetime "
589-
f"for \"{val}\", at position {i}"
590-
)
554+
elif isinstance(val, str):
555+
# string
556+
if type(val) is not str:
557+
# GH#32264 np.str_ object
558+
val = str(val)
591559

592-
if tz is not None:
593-
seen_datetime_offset = True
594-
# dateutil timezone objects cannot be hashed, so
595-
# store the UTC offsets in seconds instead
596-
out_tzoffset_vals.add(tz.total_seconds())
597-
else:
598-
# Add a marker for naive string, to track if we are
599-
# parsing mixed naive and aware strings
600-
out_tzoffset_vals.add("naive")
601-
602-
_ts = convert_datetime_to_tsobject(py_dt, None)
603-
iresult[i] = _ts.value
604-
if not string_to_dts_failed:
605-
# No error reported by string_to_dts, pick back up
606-
# where we left off
607-
value = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts)
608-
if out_local == 1:
609-
seen_datetime_offset = True
610-
# Store the out_tzoffset in seconds
611-
# since we store the total_seconds of
612-
# dateutil.tz.tzoffset objects
613-
out_tzoffset_vals.add(out_tzoffset * 60.)
614-
tz = timezone(timedelta(minutes=out_tzoffset))
615-
value = tz_localize_to_utc_single(value, tz)
616-
out_local = 0
617-
out_tzoffset = 0
618-
else:
619-
# Add a marker for naive string, to track if we are
620-
# parsing mixed naive and aware strings
621-
out_tzoffset_vals.add("naive")
622-
iresult[i] = value
623-
check_dts_bounds(&dts)
560+
if len(val) == 0 or val in nat_strings:
561+
iresult[i] = NPY_NAT
562+
continue
624563

625-
else:
626-
if is_coerce:
627-
iresult[i] = NPY_NAT
564+
string_to_dts_failed = string_to_dts(
565+
val, &dts, &out_bestunit, &out_local,
566+
&out_tzoffset, False, None, False
567+
)
568+
if string_to_dts_failed:
569+
# An error at this point is a _parsing_ error
570+
# specifically _not_ OutOfBoundsDatetime
571+
if parse_today_now(val, &iresult[i], utc):
572+
continue
573+
574+
py_dt = parse_datetime_string(val,
575+
dayfirst=dayfirst,
576+
yearfirst=yearfirst)
577+
# If the dateutil parser returned tzinfo, capture it
578+
# to check if all arguments have the same tzinfo
579+
tz = py_dt.utcoffset()
580+
581+
if tz is not None:
582+
seen_datetime_offset = True
583+
# dateutil timezone objects cannot be hashed, so
584+
# store the UTC offsets in seconds instead
585+
out_tzoffset_vals.add(tz.total_seconds())
628586
else:
629-
raise TypeError(f"{type(val)} is not convertible to datetime")
587+
# Add a marker for naive string, to track if we are
588+
# parsing mixed naive and aware strings
589+
out_tzoffset_vals.add("naive")
630590

631-
except OutOfBoundsDatetime as ex:
632-
ex.args = (f"{ex}, at position {i}",)
633-
if is_coerce:
634-
iresult[i] = NPY_NAT
635-
continue
636-
raise
591+
_ts = convert_datetime_to_tsobject(py_dt, None)
592+
iresult[i] = _ts.value
593+
else:
594+
# No error reported by string_to_dts, pick back up
595+
# where we left off
596+
value = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts)
597+
if out_local == 1:
598+
seen_datetime_offset = True
599+
# Store the out_tzoffset in seconds
600+
# since we store the total_seconds of
601+
# dateutil.tz.tzoffset objects
602+
out_tzoffset_vals.add(out_tzoffset * 60.)
603+
tz = timezone(timedelta(minutes=out_tzoffset))
604+
value = tz_localize_to_utc_single(value, tz)
605+
out_local = 0
606+
out_tzoffset = 0
607+
else:
608+
# Add a marker for naive string, to track if we are
609+
# parsing mixed naive and aware strings
610+
out_tzoffset_vals.add("naive")
611+
iresult[i] = value
612+
check_dts_bounds(&dts)
637613

638-
except OutOfBoundsDatetime:
639-
if is_raise:
640-
raise
614+
else:
615+
raise TypeError(f"{type(val)} is not convertible to datetime")
641616

642-
return ignore_errors_out_of_bounds_fallback(values), tz_out
617+
except (OutOfBoundsDatetime,) as ex:
618+
ex.args = (f"{ex}, at position {i}",)
619+
if is_coerce:
620+
iresult[i] = NPY_NAT
621+
continue
622+
elif is_raise:
623+
raise
624+
if isinstance(ex, OutOfBoundsDatetime):
625+
return ignore_errors_out_of_bounds_fallback(values), tz_out
626+
return values, None
643627

644-
except TypeError:
645-
return _array_to_datetime_object(values, errors, dayfirst, yearfirst)
628+
except (TypeError, OverflowError, ValueError) as ex:
629+
ex.args = (f"{ex}, at position {i}",)
630+
if is_coerce:
631+
iresult[i] = NPY_NAT
632+
continue
633+
elif is_raise:
634+
raise
635+
return values, None
646636

647637
if seen_datetime_offset and not utc_convert:
648638
# GH#17697

Diff for: pandas/tests/tools/test_to_datetime.py

+24-5
Original file line numberDiff line numberDiff line change
@@ -1094,8 +1094,9 @@ def test_to_datetime_tz(self, cache):
10941094
)
10951095
tm.assert_index_equal(result, expected)
10961096

1097-
def test_to_datetime_tz_mixed_raises(self, cache):
1098-
# mixed tzs will raise
1097+
def test_to_datetime_tz_mixed(self, cache):
1098+
# mixed tzs will raise if errors='raise'
1099+
# https://github.com/pandas-dev/pandas/issues/50585
10991100
arr = [
11001101
Timestamp("2013-01-01 13:00:00", tz="US/Pacific"),
11011102
Timestamp("2013-01-02 14:00:00", tz="US/Eastern"),
@@ -1107,6 +1108,21 @@ def test_to_datetime_tz_mixed_raises(self, cache):
11071108
with pytest.raises(ValueError, match=msg):
11081109
to_datetime(arr, cache=cache)
11091110

1111+
result = to_datetime(arr, cache=cache, errors="ignore")
1112+
expected = Index(
1113+
[
1114+
Timestamp("2013-01-01 13:00:00-08:00"),
1115+
Timestamp("2013-01-02 14:00:00-05:00"),
1116+
],
1117+
dtype="object",
1118+
)
1119+
tm.assert_index_equal(result, expected)
1120+
result = to_datetime(arr, cache=cache, errors="coerce")
1121+
expected = DatetimeIndex(
1122+
["2013-01-01 13:00:00-08:00", "NaT"], dtype="datetime64[ns, US/Pacific]"
1123+
)
1124+
tm.assert_index_equal(result, expected)
1125+
11101126
def test_to_datetime_different_offsets(self, cache):
11111127
# inspired by asv timeseries.ToDatetimeNONISO8601 benchmark
11121128
# see GH-26097 for more
@@ -1540,7 +1556,10 @@ def test_to_datetime_malformed_raise(self):
15401556
ts_strings = ["200622-12-31", "111111-24-11"]
15411557
with pytest.raises(
15421558
ValueError,
1543-
match=r"^hour must be in 0\.\.23: 111111-24-11, at position 1$",
1559+
match=(
1560+
r"^offset must be a timedelta strictly between "
1561+
r"-timedelta\(hours=24\) and timedelta\(hours=24\)., at position 0$"
1562+
),
15441563
):
15451564
with tm.assert_produces_warning(
15461565
UserWarning, match="Could not infer format"
@@ -2381,8 +2400,8 @@ def test_to_datetime_unprocessable_input(self, cache):
23812400

23822401
expected = Index(np.array([1, "1"], dtype="O"))
23832402
tm.assert_equal(result, expected)
2384-
msg = "invalid string coercion to datetime"
2385-
with pytest.raises(TypeError, match=msg):
2403+
msg = '^Given date string "1" not likely a datetime, at position 1$'
2404+
with pytest.raises(ValueError, match=msg):
23862405
to_datetime([1, "1"], errors="raise", cache=cache)
23872406

23882407
def test_to_datetime_unhashable_input(self, cache):

0 commit comments

Comments
 (0)