diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 57dd1d05a274e..ad44078d051f6 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -242,6 +242,7 @@ I/O - Bug in :func:`read_csv` not accepting ``usecols`` with different length than ``names`` for ``engine="python"`` (:issue:`16469`) - Bug in :func:`read_csv` raising ``TypeError`` when ``names`` and ``parse_dates`` is specified for ``engine="c"`` (:issue:`33699`) - Allow custom error values for parse_dates argument of :func:`read_sql`, :func:`read_sql_query` and :func:`read_sql_table` (:issue:`35185`) +- Bug in :class:`Datetime64Formatter` that caused error on string representation with extension types of datetime64 values and ndim > 1 (:issue:`38390`) - Period diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 527ee51873631..cebce0229da6b 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1514,18 +1514,25 @@ def __init__( def _format_strings(self) -> List[str]: """ we by definition have DO NOT have a TZ """ - values = self.values - - if not isinstance(values, DatetimeIndex): - values = DatetimeIndex(values) + values = np.asarray(self.values) + flat_values = values.ravel() if values.ndim > 1 else values + flat_values = DatetimeArray(flat_values) if self.formatter is not None and callable(self.formatter): - return [self.formatter(x) for x in values] + fmt_values = [self.formatter(x) for x in flat_values] + else: + fmt_values = flat_values._format_native_types( + na_rep=self.nat_rep, date_format=self.date_format + ) - fmt_values = values._data._format_native_types( - na_rep=self.nat_rep, date_format=self.date_format - ) - return fmt_values.tolist() + if values.ndim > 1: + fmt_values = np.asarray(fmt_values).reshape(values.shape) + nested_formatter = GenericArrayFormatter(fmt_values) + fmt_values = nested_formatter.get_result() + elif isinstance(fmt_values, np.ndarray): + fmt_values = fmt_values.tolist() + + return fmt_values class ExtensionArrayFormatter(GenericArrayFormatter): @@ -1700,11 +1707,19 @@ class Datetime64TZFormatter(Datetime64Formatter): def _format_strings(self) -> List[str]: """ we by definition have a TZ """ values = self.values.astype(object) - ido = is_dates_only(values) + flat_values = values.ravel() if values.ndim > 1 else values + + ido = is_dates_only(flat_values) formatter = self.formatter or get_format_datetime64( ido, date_format=self.date_format ) - fmt_values = [formatter(x) for x in values] + + fmt_values = [formatter(x) for x in flat_values] + + if values.ndim > 1: + fmt_values = np.asarray(fmt_values).reshape(values.shape) + nested_formatter = GenericArrayFormatter(fmt_values) + fmt_values = nested_formatter.get_result() return fmt_values diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index fe85849c6dcca..42256333bd3a1 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -3106,6 +3106,94 @@ def format_func(x): result = formatter.get_result() assert result == ["10:10", "12:12"] + def test_datetime64formatter_2d_array(self): + x = pd.date_range("2018-01-01", periods=10, freq="H").to_numpy() + + formatter = fmt.Datetime64Formatter(x.reshape((5, 2))) + result = formatter.get_result() + assert len(result) == 5 + assert result[0].strip() == "[2018-01-01 00:00:00, 2018-01-01 01:00:00]" + assert result[4].strip() == "[2018-01-01 08:00:00, 2018-01-01 09:00:00]" + + formatter = fmt.Datetime64Formatter(x.reshape((2, 5))) + result = formatter.get_result() + assert len(result) == 2 + assert result[0].strip() == "[2018-01-01 00:00:00, 2018-01-01 01:00:00, 201..." + assert result[1].strip() == "[2018-01-01 05:00:00, 2018-01-01 06:00:00, 201..." + + def test_datetime64formatter_3d_array(self): + x = pd.date_range("2018-01-01", periods=10, freq="H").to_numpy() + + formatter = fmt.Datetime64Formatter(x.reshape((10, 1, 1))) + result = formatter.get_result() + assert len(result) == 10 + assert result[0].strip() == "[[2018-01-01 00:00:00]]" + assert result[9].strip() == "[[2018-01-01 09:00:00]]" + + def test_datetime64formatter_2d_array_format_func(self): + x = pd.date_range("2018-01-01", periods=24, freq="H").to_numpy() + + def format_func(t): + return t.strftime("%H-%m") + + formatter = fmt.Datetime64Formatter(x.reshape((4, 2, 3)), formatter=format_func) + result = formatter.get_result() + assert len(result) == 4 + assert result[0].strip() == "[[00-01, 01-01, 02-01], [03-01, 04-01, 05-01]]" + assert result[3].strip() == "[[18-01, 19-01, 20-01], [21-01, 22-01, 23-01]]" + + +class TestDatetime64TZFormatter: + def test_mixed(self): + utc = dateutil.tz.tzutc() + x = Series( + [ + datetime(2013, 1, 1, tzinfo=utc), + datetime(2013, 1, 1, 12, tzinfo=utc), + pd.NaT, + ] + ) + result = fmt.Datetime64TZFormatter(x).get_result() + assert len(result) == 3 + assert result[0].strip() == "2013-01-01 00:00:00+00:00" + assert result[1].strip() == "2013-01-01 12:00:00+00:00" + assert result[2].strip() == "NaT" + + def test_datetime64formatter_1d_array(self): + x = pd.date_range("2018-01-01", periods=3, freq="H", tz="US/Pacific").to_numpy() + formatter = fmt.Datetime64TZFormatter(x) + result = formatter.get_result() + assert len(result) == 3 + assert result[0].strip() == "2018-01-01 00:00:00-08:00" + assert result[1].strip() == "2018-01-01 01:00:00-08:00" + assert result[2].strip() == "2018-01-01 02:00:00-08:00" + + def test_datetime64formatter_2d_array(self): + x = pd.date_range( + "2018-01-01", periods=10, freq="H", tz="US/Pacific" + ).to_numpy() + formatter = fmt.Datetime64TZFormatter(x.reshape((5, 2))) + result = formatter.get_result() + assert len(result) == 5 + assert result[0].strip() == "[2018-01-01 00:00:00-08:00, 2018-01-01 01:00:0..." + assert result[4].strip() == "[2018-01-01 08:00:00-08:00, 2018-01-01 09:00:0..." + + def test_datetime64formatter_2d_array_format_func(self): + x = pd.date_range( + "2018-01-01", periods=16, freq="H", tz="US/Pacific" + ).to_numpy() + + def format_func(t): + return t.strftime("%H-%m %Z") + + formatter = fmt.Datetime64TZFormatter( + x.reshape((4, 2, 2)), formatter=format_func + ) + result = formatter.get_result() + assert len(result) == 4 + assert result[0].strip() == "[[00-01 PST, 01-01 PST], [02-01 PST, 03-01 PST]]" + assert result[3].strip() == "[[12-01 PST, 13-01 PST], [14-01 PST, 15-01 PST]]" + class TestNaTFormatting: def test_repr(self):