Skip to content

Commit 3bc2203

Browse files
authored
BUG: add date_format to read_csv / Date parsing mistake. read_csv (#50320)
* simplify * fixup Co-authored-by: MarcoGorelli <>
1 parent b0305f7 commit 3bc2203

File tree

5 files changed

+50
-45
lines changed

5 files changed

+50
-45
lines changed

pandas/_libs/tslibs/parsing.pyi

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def _does_string_look_like_datetime(py_string: str) -> bool: ...
2323
def quarter_to_myear(year: int, quarter: int, freq: str) -> tuple[int, int]: ...
2424
def try_parse_dates(
2525
values: npt.NDArray[np.object_], # object[:]
26-
parser=...,
26+
parser,
2727
dayfirst: bool = ...,
2828
default: datetime | None = ...,
2929
) -> npt.NDArray[np.object_]: ...

pandas/_libs/tslibs/parsing.pyx

+6-27
Original file line numberDiff line numberDiff line change
@@ -661,7 +661,7 @@ cdef dateutil_parse(
661661

662662

663663
def try_parse_dates(
664-
object[:] values, parser=None, bint dayfirst=False, default=None,
664+
object[:] values, parser, bint dayfirst=False, default=None,
665665
) -> np.ndarray:
666666
cdef:
667667
Py_ssize_t i, n
@@ -670,32 +670,11 @@ def try_parse_dates(
670670
n = len(values)
671671
result = np.empty(n, dtype="O")
672672

673-
if parser is None:
674-
if default is None: # GH2618
675-
date = datetime.now()
676-
default = datetime(date.year, date.month, 1)
677-
678-
def parse_date(x):
679-
return du_parse(x, dayfirst=dayfirst, default=default)
680-
681-
# EAFP here
682-
try:
683-
for i in range(n):
684-
if values[i] == "":
685-
result[i] = np.nan
686-
else:
687-
result[i] = parse_date(values[i])
688-
except Exception:
689-
# Since parser is user-defined, we can't guess what it might raise
690-
return values
691-
else:
692-
parse_date = parser
693-
694-
for i in range(n):
695-
if values[i] == "":
696-
result[i] = np.nan
697-
else:
698-
result[i] = parse_date(values[i])
673+
for i in range(n):
674+
if values[i] == "":
675+
result[i] = np.nan
676+
else:
677+
result[i] = parser(values[i])
699678

700679
return result.base # .base to access underlying ndarray
701680

pandas/io/parsers/base_parser.py

+7-13
Original file line numberDiff line numberDiff line change
@@ -1121,19 +1121,13 @@ def converter(*date_cols):
11211121
if date_parser is None:
11221122
strs = parsing.concat_date_cols(date_cols)
11231123

1124-
try:
1125-
return tools.to_datetime(
1126-
ensure_object(strs),
1127-
utc=False,
1128-
dayfirst=dayfirst,
1129-
errors="ignore",
1130-
cache=cache_dates,
1131-
).to_numpy()
1132-
1133-
except ValueError:
1134-
return tools.to_datetime(
1135-
parsing.try_parse_dates(strs, dayfirst=dayfirst), cache=cache_dates
1136-
)
1124+
return tools.to_datetime(
1125+
ensure_object(strs),
1126+
utc=False,
1127+
dayfirst=dayfirst,
1128+
errors="ignore",
1129+
cache=cache_dates,
1130+
).to_numpy()
11371131
else:
11381132
try:
11391133
result = tools.to_datetime(

pandas/tests/io/parser/test_parse_dates.py

+31-1
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,9 @@ def date_parser(*date_cols):
165165
-------
166166
parsed : Series
167167
"""
168-
return parsing.try_parse_dates(parsing.concat_date_cols(date_cols))
168+
return parsing.try_parse_dates(
169+
parsing.concat_date_cols(date_cols), parser=du_parse
170+
)
169171

170172
kwds = {
171173
"header": None,
@@ -2026,3 +2028,31 @@ def test_parse_dates_and_string_dtype(all_parsers):
20262028
expected = DataFrame({"a": ["1"], "b": [Timestamp("2019-12-31")]})
20272029
expected["a"] = expected["a"].astype("string")
20282030
tm.assert_frame_equal(result, expected)
2031+
2032+
2033+
def test_parse_dot_separated_dates(all_parsers):
2034+
# https://github.com/pandas-dev/pandas/issues/2586
2035+
parser = all_parsers
2036+
data = """a,b
2037+
27.03.2003 14:55:00.000,1
2038+
03.08.2003 15:20:00.000,2"""
2039+
if parser.engine == "pyarrow":
2040+
expected_index = Index(
2041+
["27.03.2003 14:55:00.000", "03.08.2003 15:20:00.000"],
2042+
dtype="object",
2043+
name="a",
2044+
)
2045+
warn = None
2046+
else:
2047+
expected_index = DatetimeIndex(
2048+
["2003-03-27 14:55:00", "2003-08-03 15:20:00"],
2049+
dtype="datetime64[ns]",
2050+
name="a",
2051+
)
2052+
warn = UserWarning
2053+
msg = "when dayfirst=False was specified"
2054+
result = parser.read_csv_check_warnings(
2055+
warn, msg, StringIO(data), parse_dates=True, index_col=0
2056+
)
2057+
expected = DataFrame({"b": [1, 2]}, index=expected_index)
2058+
tm.assert_frame_equal(result, expected)

pandas/tests/tslibs/test_parsing.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from datetime import datetime
55
import re
66

7-
from dateutil.parser import parse
7+
from dateutil.parser import parse as du_parse
88
import numpy as np
99
import pytest
1010

@@ -271,9 +271,11 @@ def test_guess_datetime_format_no_padding(string, fmt, dayfirst, warning):
271271

272272
def test_try_parse_dates():
273273
arr = np.array(["5/1/2000", "6/1/2000", "7/1/2000"], dtype=object)
274-
result = parsing.try_parse_dates(arr, dayfirst=True)
274+
result = parsing.try_parse_dates(
275+
arr, dayfirst=True, parser=lambda x: du_parse(x, dayfirst=True)
276+
)
275277

276-
expected = np.array([parse(d, dayfirst=True) for d in arr])
278+
expected = np.array([du_parse(d, dayfirst=True) for d in arr])
277279
tm.assert_numpy_array_equal(result, expected)
278280

279281

0 commit comments

Comments
 (0)