From d7fa43cd17a02c91485db7ee630081c1b164af83 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sun, 22 Oct 2023 19:11:14 -0700
Subject: [PATCH 1/9] ENH: read_stata return non-nano

---
 doc/source/whatsnew/v2.2.0.rst |   1 +
 pandas/io/stata.py             | 144 ++++++++++++---------------------
 pandas/tests/io/test_stata.py  |  85 ++++++++++++-------
 3 files changed, 112 insertions(+), 118 deletions(-)

diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
index d9ab0452c8334..fad58657d0310 100644
--- a/doc/source/whatsnew/v2.2.0.rst
+++ b/doc/source/whatsnew/v2.2.0.rst
@@ -350,6 +350,7 @@ Other enhancements
 - Improved error message that appears in :meth:`DatetimeIndex.to_period` with frequencies which are not supported as period frequencies, such as ``"BMS"`` (:issue:`56243`)
 - Improved error message when constructing :class:`Period` with invalid offsets such as ``"QS"`` (:issue:`55785`)
 - The dtypes ``string[pyarrow]`` and ``string[pyarrow_numpy]`` now both utilize the ``large_string`` type from PyArrow to avoid overflow for long columns (:issue:`56259`)
+- :func:`read_stata` now returns ``datetime64`` resolutions better matching those natively stored in the stata format (:issue:`??`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_220.notable_bug_fixes:
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index 576e27f202524..e5b7eeacc757c 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -62,7 +62,6 @@
     Timestamp,
     isna,
     to_datetime,
-    to_timedelta,
 )
 from pandas.core.frame import DataFrame
 from pandas.core.indexes.base import Index
@@ -232,6 +231,7 @@
 
 
 stata_epoch: Final = datetime(1960, 1, 1)
+unix_epoch: Final = datetime(1970, 1, 1)
 
 
 def _stata_elapsed_date_to_datetime_vec(dates: Series, fmt: str) -> Series:
@@ -280,64 +280,43 @@ def _stata_elapsed_date_to_datetime_vec(dates: Series, fmt: str) -> Series:
     date - ty
         years since 0000
     """
-    MIN_YEAR, MAX_YEAR = Timestamp.min.year, Timestamp.max.year
-    MAX_DAY_DELTA = (Timestamp.max - datetime(1960, 1, 1)).days
-    MIN_DAY_DELTA = (Timestamp.min - datetime(1960, 1, 1)).days
-    MIN_MS_DELTA = MIN_DAY_DELTA * 24 * 3600 * 1000
-    MAX_MS_DELTA = MAX_DAY_DELTA * 24 * 3600 * 1000
 
-    def convert_year_month_safe(year, month) -> Series:
-        """
-        Convert year and month to datetimes, using pandas vectorized versions
-        when the date range falls within the range supported by pandas.
-        Otherwise it falls back to a slower but more robust method
-        using datetime.
-        """
-        if year.max() < MAX_YEAR and year.min() > MIN_YEAR:
-            return to_datetime(100 * year + month, format="%Y%m")
-        else:
-            index = getattr(year, "index", None)
-            return Series([datetime(y, m, 1) for y, m in zip(year, month)], index=index)
-
-    def convert_year_days_safe(year, days) -> Series:
-        """
-        Converts year (e.g. 1999) and days since the start of the year to a
-        datetime or datetime64 Series
-        """
-        if year.max() < (MAX_YEAR - 1) and year.min() > MIN_YEAR:
-            return to_datetime(year, format="%Y") + to_timedelta(days, unit="d")
-        else:
-            index = getattr(year, "index", None)
-            value = [
-                datetime(y, 1, 1) + timedelta(days=int(d)) for y, d in zip(year, days)
-            ]
-            return Series(value, index=index)
+    if fmt.startswith(("%tc", "tc")):
+        # Delta ms relative to base
+        td = np.timedelta64(stata_epoch - unix_epoch, "ms")
+        conv_dates = np.array(dates._values, dtype="M8[ms]") + td
+        return Series(conv_dates, index=dates.index)
 
-    def convert_delta_safe(base, deltas, unit) -> Series:
-        """
-        Convert base dates and deltas to datetimes, using pandas vectorized
-        versions if the deltas satisfy restrictions required to be expressed
-        as dates in pandas.
-        """
-        index = getattr(deltas, "index", None)
-        if unit == "d":
-            if deltas.max() > MAX_DAY_DELTA or deltas.min() < MIN_DAY_DELTA:
-                values = [base + timedelta(days=int(d)) for d in deltas]
-                return Series(values, index=index)
-        elif unit == "ms":
-            if deltas.max() > MAX_MS_DELTA or deltas.min() < MIN_MS_DELTA:
-                values = [
-                    base + timedelta(microseconds=(int(d) * 1000)) for d in deltas
-                ]
-                return Series(values, index=index)
-        else:
-            raise ValueError("format not understood")
-        base = to_datetime(base)
-        deltas = to_timedelta(deltas, unit=unit)
-        return base + deltas
+    elif fmt.startswith(("%td", "td", "%d", "d")):
+        # Delta days relative to base
+        td = np.timedelta64(stata_epoch - unix_epoch, "D")
+        conv_dates = np.array(dates._values, dtype="M8[D]") + td
+        return Series(conv_dates, index=dates.index)
+
+    elif fmt.startswith(("%tm", "tm")):
+        # Delta months relative to base
+        ordinals = dates + (stata_epoch.year - unix_epoch.year) * 12
+        res = np.array(ordinals, dtype="M8[M]").astype("M8[s]")
+        return Series(res, index=dates.index)
+
+    elif fmt.startswith(("%tq", "tq")):
+        # Delta quarters relative to base
+        ordinals = dates + (stata_epoch.year - unix_epoch.year) * 4
+        res = np.array(ordinals, dtype="M8[3M]").astype("M8[s]")
+        return Series(res, index=dates.index)
+
+    elif fmt.startswith(("%th", "th")):
+        # Delta half-years relative to base
+        ordinals = dates + (stata_epoch.year - unix_epoch.year) * 2
+        res = np.array(ordinals, dtype="M8[6M]").astype("M8[s]")
+        return Series(res, index=dates.index)
+
+    elif fmt.startswith(("%ty", "ty")):
+        # Years -- not delta
+        ordinals = dates - 1970
+        res = np.array(ordinals, dtype="M8[Y]").astype("M8[s]")
+        return Series(res, index=dates.index)
 
-    # TODO(non-nano): If/when pandas supports more than datetime64[ns], this
-    #  should be improved to use correct range, e.g. datetime[Y] for yearly
     bad_locs = np.isnan(dates)
     has_bad_values = False
     if bad_locs.any():
@@ -345,11 +324,7 @@ def convert_delta_safe(base, deltas, unit) -> Series:
         dates._values[bad_locs] = 1.0  # Replace with NaT
     dates = dates.astype(np.int64)
 
-    if fmt.startswith(("%tc", "tc")):  # Delta ms relative to base
-        base = stata_epoch
-        ms = dates
-        conv_dates = convert_delta_safe(base, ms, "ms")
-    elif fmt.startswith(("%tC", "tC")):
+    if fmt.startswith(("%tC", "tC")):
         warnings.warn(
             "Encountered %tC format. Leaving in Stata Internal Format.",
             stacklevel=find_stack_level(),
@@ -358,33 +333,18 @@ def convert_delta_safe(base, deltas, unit) -> Series:
         if has_bad_values:
             conv_dates[bad_locs] = NaT
         return conv_dates
-    # Delta days relative to base
-    elif fmt.startswith(("%td", "td", "%d", "d")):
-        base = stata_epoch
-        days = dates
-        conv_dates = convert_delta_safe(base, days, "d")
     # does not count leap days - 7 days is a week.
     # 52nd week may have more than 7 days
     elif fmt.startswith(("%tw", "tw")):
         year = stata_epoch.year + dates // 52
         days = (dates % 52) * 7
-        conv_dates = convert_year_days_safe(year, days)
-    elif fmt.startswith(("%tm", "tm")):  # Delta months relative to base
-        year = stata_epoch.year + dates // 12
-        month = (dates % 12) + 1
-        conv_dates = convert_year_month_safe(year, month)
-    elif fmt.startswith(("%tq", "tq")):  # Delta quarters relative to base
-        year = stata_epoch.year + dates // 4
-        quarter_month = (dates % 4) * 3 + 1
-        conv_dates = convert_year_month_safe(year, quarter_month)
-    elif fmt.startswith(("%th", "th")):  # Delta half-years relative to base
-        year = stata_epoch.year + dates // 2
-        month = (dates % 2) * 6 + 1
-        conv_dates = convert_year_month_safe(year, month)
-    elif fmt.startswith(("%ty", "ty")):  # Years -- not delta
-        year = dates
-        first_month = np.ones_like(dates)
-        conv_dates = convert_year_month_safe(year, first_month)
+        per_y = (year - 1970).array.view("Period[Y]")
+        per_d = per_y.asfreq("D", how="S")
+        per_d_shifted = per_d + days
+        per_s = per_d_shifted.dt.asfreq("s", how="S")
+        conv_dates_arr = per_s.array.view("M8[s]")
+        conv_dates = Series(conv_dates_arr, index=per_s.index, name=per_s.name)
+
     else:
         raise ValueError(f"Date fmt {fmt} not understood")
 
@@ -409,6 +369,7 @@ def _datetime_to_stata_elapsed_vec(dates: Series, fmt: str) -> Series:
     index = dates.index
     NS_PER_DAY = 24 * 3600 * 1000 * 1000 * 1000
     US_PER_DAY = NS_PER_DAY / 1000
+    MS_PER_DAY = NS_PER_DAY / 1_000_000
 
     def parse_dates_safe(
         dates: Series, delta: bool = False, year: bool = False, days: bool = False
@@ -416,17 +377,18 @@ def parse_dates_safe(
         d = {}
         if lib.is_np_dtype(dates.dtype, "M"):
             if delta:
-                time_delta = dates - Timestamp(stata_epoch).as_unit("ns")
-                d["delta"] = time_delta._values.view(np.int64) // 1000  # microseconds
+                time_delta = dates.dt.as_unit("ms") - Timestamp(stata_epoch).as_unit(
+                    "ms"
+                )
+                d["delta"] = time_delta._values.view(np.int64)
             if days or year:
                 date_index = DatetimeIndex(dates)
                 d["year"] = date_index._data.year
                 d["month"] = date_index._data.month
             if days:
-                days_in_ns = dates._values.view(np.int64) - to_datetime(
-                    d["year"], format="%Y"
-                )._values.view(np.int64)
-                d["days"] = days_in_ns // NS_PER_DAY
+                year_start = np.asarray(dates).astype("M8[Y]").astype(dates.dtype)
+                diff = dates - year_start
+                d["days"] = np.asarray(diff).astype("m8[D]").view("int64")
 
         elif infer_dtype(dates, skipna=False) == "datetime":
             if delta:
@@ -466,7 +428,7 @@ def g(x: datetime) -> int:
 
     if fmt in ["%tc", "tc"]:
         d = parse_dates_safe(dates, delta=True)
-        conv_dates = d.delta / 1000
+        conv_dates = d.delta
     elif fmt in ["%tC", "tC"]:
         warnings.warn(
             "Stata Internal Format tC not supported.",
@@ -475,7 +437,7 @@ def g(x: datetime) -> int:
         conv_dates = dates
     elif fmt in ["%td", "td"]:
         d = parse_dates_safe(dates, delta=True)
-        conv_dates = d.delta // US_PER_DAY
+        conv_dates = d.delta // MS_PER_DAY
     elif fmt in ["%tw", "tw"]:
         d = parse_dates_safe(dates, year=True, days=True)
         conv_dates = 52 * (d.year - stata_epoch.year) + d.days // 7
diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
index 5c6377349304c..4d81cae6db2bf 100644
--- a/pandas/tests/io/test_stata.py
+++ b/pandas/tests/io/test_stata.py
@@ -174,7 +174,16 @@ def test_read_dta2(self, datapath):
                 "yearly_date",
             ],
         )
-        expected["yearly_date"] = expected["yearly_date"].astype("O")
+        # TODO(GH#55564): just pass M8[s] to the constructor
+        expected["datetime_c"] = expected["datetime_c"].astype("M8[ms]")
+        expected["date"] = expected["date"].astype("M8[s]")
+        expected["weekly_date"] = expected["weekly_date"].astype("M8[s]")
+        expected["monthly_date"] = expected["monthly_date"].astype("M8[s]")
+        expected["quarterly_date"] = expected["quarterly_date"].astype("M8[s]")
+        expected["half_yearly_date"] = expected["half_yearly_date"].astype("M8[s]")
+        expected["yearly_date"] = (
+            expected["yearly_date"].astype("Period[s]").array.view("M8[s]")
+        )
 
         path1 = datapath("io", "data", "stata", "stata2_114.dta")
         path2 = datapath("io", "data", "stata", "stata2_115.dta")
@@ -360,12 +369,15 @@ def test_read_write_dta10(self, version):
         with tm.ensure_clean() as path:
             original.to_stata(path, convert_dates={"datetime": "tc"}, version=version)
             written_and_read_again = self.read_dta(path)
-            # original.index is np.int32, read index is np.int64
-            tm.assert_frame_equal(
-                written_and_read_again.set_index("index"),
-                original,
-                check_index_type=False,
-            )
+
+        expected = original[:]
+        # "tc" convert_dates means we store in ms
+        expected["datetime"] = expected["datetime"].astype("M8[ms]")
+
+        tm.assert_frame_equal(
+            written_and_read_again.set_index("index"),
+            expected,
+        )
 
     def test_stata_doc_examples(self):
         with tm.ensure_clean() as path:
@@ -514,9 +526,10 @@ def test_read_write_reread_dta15(self, file, datapath):
         expected["long_"] = expected["long_"].astype(np.int32)
         expected["float_"] = expected["float_"].astype(np.float32)
         expected["double_"] = expected["double_"].astype(np.float64)
-        expected["date_td"] = expected["date_td"].apply(
-            datetime.strptime, args=("%Y-%m-%d",)
-        )
+
+        # TODO(GH#55564): directly cast to M8[s]
+        arr = expected["date_td"].astype("Period[D]")._values.asfreq("s", how="S")
+        expected["date_td"] = arr.view("M8[s]")
 
         file = datapath("io", "data", "stata", f"{file}.dta")
         parsed = self.read_dta(file)
@@ -636,10 +649,11 @@ def test_dates_invalid_column(self):
 
             written_and_read_again = self.read_dta(path)
 
-        modified = original
-        modified.columns = ["_0"]
-        modified.index = original.index.astype(np.int32)
-        tm.assert_frame_equal(written_and_read_again.set_index("index"), modified)
+        expected = original.copy()
+        expected.columns = ["_0"]
+        expected.index = original.index.astype(np.int32)
+        expected["_0"] = expected["_0"].astype("M8[ms]")
+        tm.assert_frame_equal(written_and_read_again.set_index("index"), expected)
 
     def test_105(self, datapath):
         # Data obtained from:
@@ -684,7 +698,9 @@ def test_date_export_formats(self):
             [expected_values],
             index=pd.Index([0], dtype=np.int32, name="index"),
             columns=columns,
+            dtype="M8[s]",
         )
+        expected["tc"] = expected["tc"].astype("M8[ms]")
 
         with tm.ensure_clean() as path:
             original.to_stata(path, convert_dates=conversions)
@@ -881,6 +897,15 @@ def test_big_dates(self, datapath):
         expected[5][5] = expected[5][6] = datetime(1678, 1, 1)
 
         expected = DataFrame(expected, columns=columns, dtype=object)
+        # FIXME(GH#55564): can't astype directly to ms or s
+        expected["date_tc"] = expected["date_tc"].astype("Period[ms]")._values.view("M8[ms]")
+        expected["date_td"] = expected["date_td"].astype("Period[s]")._values.view("M8[s]")
+        expected["date_tm"] = expected["date_tm"].astype("Period[s]")._values.view("M8[s]")
+        expected["date_tw"] = expected["date_tw"].astype("Period[s]")._values.view("M8[s]")
+        expected["date_tq"] = expected["date_tq"].astype("Period[s]")._values.view("M8[s]")
+        expected["date_th"] = expected["date_th"].astype("Period[s]")._values.view("M8[s]")
+        expected["date_ty"] = expected["date_ty"].astype("Period[s]")._values.view("M8[s]")
+
         parsed_115 = read_stata(datapath("io", "data", "stata", "stata9_115.dta"))
         parsed_117 = read_stata(datapath("io", "data", "stata", "stata9_117.dta"))
         tm.assert_frame_equal(expected, parsed_115, check_datetimelike_compat=True)
@@ -906,9 +931,9 @@ def test_dtype_conversion(self, datapath):
         expected["long_"] = expected["long_"].astype(np.int32)
         expected["float_"] = expected["float_"].astype(np.float32)
         expected["double_"] = expected["double_"].astype(np.float64)
-        expected["date_td"] = expected["date_td"].apply(
-            datetime.strptime, args=("%Y-%m-%d",)
-        )
+        # FIXME(GH#55564): can't astype directly to M8[ms] without OutOfBoundsDatetime
+        parr = expected["date_td"].astype("Period[D]")._values
+        expected["date_td"] = parr.view("M8[D]").astype("M8[s]")
 
         no_conversion = read_stata(
             datapath("io", "data", "stata", "stata6_117.dta"), convert_dates=True
@@ -922,12 +947,10 @@ def test_dtype_conversion(self, datapath):
         )
 
         # read_csv types are the same
-        expected = self.read_csv(datapath("io", "data", "stata", "stata6.csv"))
-        expected["date_td"] = expected["date_td"].apply(
-            datetime.strptime, args=("%Y-%m-%d",)
-        )
+        expected2 = self.read_csv(datapath("io", "data", "stata", "stata6.csv"))
+        expected2["date_td"] = expected["date_td"]
 
-        tm.assert_frame_equal(expected, conversion)
+        tm.assert_frame_equal(expected2, conversion)
 
     def test_drop_column(self, datapath):
         expected = self.read_csv(datapath("io", "data", "stata", "stata6.csv"))
@@ -1392,10 +1415,14 @@ def test_default_date_conversion(self):
             }
         )
 
+        expected = original[:]
+        # "tc" for convert_dates below stores with "ms" resolution
+        expected["dates"] = expected["dates"].astype("M8[ms]")
+
         with tm.ensure_clean() as path:
             original.to_stata(path, write_index=False)
             reread = read_stata(path, convert_dates=True)
-            tm.assert_frame_equal(original, reread)
+            tm.assert_frame_equal(expected, reread)
 
             original.to_stata(path, write_index=False, convert_dates={"dates": "tc"})
             direct = read_stata(path, convert_dates=True)
@@ -1666,11 +1693,14 @@ def test_writer_117(self):
                 version=117,
             )
             written_and_read_again = self.read_dta(path)
-        # original.index is np.int32, read index is np.int64
+
+        expected = original[:]
+        # "tc" for convert_dates means we store with "ms" resolution
+        expected["datetime"] = expected["datetime"].astype("M8[ms]")
+
         tm.assert_frame_equal(
             written_and_read_again.set_index("index"),
-            original,
-            check_index_type=False,
+            expected,
         )
         tm.assert_frame_equal(original, copy)
 
@@ -1943,7 +1973,8 @@ def test_read_write_ea_dtypes(self, dtype_backend):
                 "b": ["a", "b", "c"],
                 "c": [1.0, 0, np.nan],
                 "d": [1.5, 2.5, 3.5],
-                "e": pd.date_range("2020-12-31", periods=3, freq="D"),
+                # stata stores with ms unit, so unit does not round-trip exactly
+                "e": pd.date_range("2020-12-31", periods=3, freq="D", unit="ms"),
             },
             index=pd.Index([0, 1, 2], name="index", dtype=np.int32),
         )

From fec9cc2eb7445426b260a2ec67ba3ff3609851f2 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sun, 22 Oct 2023 19:13:23 -0700
Subject: [PATCH 2/9] GH ref

---
 doc/source/whatsnew/v2.2.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
index fad58657d0310..aabb1718e60b2 100644
--- a/doc/source/whatsnew/v2.2.0.rst
+++ b/doc/source/whatsnew/v2.2.0.rst
@@ -339,6 +339,7 @@ Other enhancements
 - :func:`read_spss` now returns a :class:`DataFrame` that stores the metadata in :attr:`DataFrame.attrs` (:issue:`54264`)
 - :func:`tseries.api.guess_datetime_format` is now part of the public API (:issue:`54727`)
 - :meth:`DataFrame.apply` now allows the usage of numba (via ``engine="numba"``) to JIT compile the passed function, allowing for potential speedups (:issue:`54666`)
+- :func:`read_stata` now returns ``datetime64`` resolutions better matching those natively stored in the stata format (:issue:`55642`)
 - :meth:`ExtensionArray._explode` interface method added to allow extension type implementations of the ``explode`` method (:issue:`54833`)
 - :meth:`ExtensionArray.duplicated` added to allow extension type implementations of the ``duplicated`` method (:issue:`55255`)
 - :meth:`Series.ffill`, :meth:`Series.bfill`, :meth:`DataFrame.ffill`, and :meth:`DataFrame.bfill` have gained the argument ``limit_area``; 3rd party :class:`.ExtensionArray` authors need to add this argument to the method ``_pad_or_backfill`` (:issue:`56492`)

From f9598b9cfaeb647df80e627e4d7763e249ab88f3 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 23 Oct 2023 09:01:09 -0700
Subject: [PATCH 3/9] mypy fixup

---
 pandas/io/stata.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index e5b7eeacc757c..d734610ed822f 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -284,14 +284,14 @@ def _stata_elapsed_date_to_datetime_vec(dates: Series, fmt: str) -> Series:
     if fmt.startswith(("%tc", "tc")):
         # Delta ms relative to base
         td = np.timedelta64(stata_epoch - unix_epoch, "ms")
-        conv_dates = np.array(dates._values, dtype="M8[ms]") + td
-        return Series(conv_dates, index=dates.index)
+        res = np.array(dates._values, dtype="M8[ms]") + td
+        return Series(res, index=dates.index)
 
     elif fmt.startswith(("%td", "td", "%d", "d")):
         # Delta days relative to base
         td = np.timedelta64(stata_epoch - unix_epoch, "D")
-        conv_dates = np.array(dates._values, dtype="M8[D]") + td
-        return Series(conv_dates, index=dates.index)
+        res = np.array(dates._values, dtype="M8[D]") + td
+        return Series(res, index=dates.index)
 
     elif fmt.startswith(("%tm", "tm")):
         # Delta months relative to base

From dcf0c228732067aefeed3ace746ae38eb64b7b0a Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 23 Oct 2023 11:52:30 -0700
Subject: [PATCH 4/9] update doctest

---
 pandas/io/stata.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index d734610ed822f..dd3b6ed41657e 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -256,7 +256,7 @@ def _stata_elapsed_date_to_datetime_vec(dates: Series, fmt: str) -> Series:
     >>> dates = pd.Series([52])
     >>> _stata_elapsed_date_to_datetime_vec(dates , "%tw")
     0   1961-01-01
-    dtype: datetime64[ns]
+    dtype: datetime64[s]
 
     Notes
     -----

From fa3aad7dc6c671212ee2635063916a34ac4366ef Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 13 Nov 2023 13:57:23 -0800
Subject: [PATCH 5/9] simplify

---
 pandas/tests/io/test_stata.py | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
index 4d81cae6db2bf..a9001ab9a9fa9 100644
--- a/pandas/tests/io/test_stata.py
+++ b/pandas/tests/io/test_stata.py
@@ -897,14 +897,13 @@ def test_big_dates(self, datapath):
         expected[5][5] = expected[5][6] = datetime(1678, 1, 1)
 
         expected = DataFrame(expected, columns=columns, dtype=object)
-        # FIXME(GH#55564): can't astype directly to ms or s
-        expected["date_tc"] = expected["date_tc"].astype("Period[ms]")._values.view("M8[ms]")
-        expected["date_td"] = expected["date_td"].astype("Period[s]")._values.view("M8[s]")
-        expected["date_tm"] = expected["date_tm"].astype("Period[s]")._values.view("M8[s]")
-        expected["date_tw"] = expected["date_tw"].astype("Period[s]")._values.view("M8[s]")
-        expected["date_tq"] = expected["date_tq"].astype("Period[s]")._values.view("M8[s]")
-        expected["date_th"] = expected["date_th"].astype("Period[s]")._values.view("M8[s]")
-        expected["date_ty"] = expected["date_ty"].astype("Period[s]")._values.view("M8[s]")
+        expected["date_tc"] = expected["date_tc"].astype("M8[ms]")
+        expected["date_td"] = expected["date_td"].astype("M8[s]")
+        expected["date_tm"] = expected["date_tm"].astype("M8[s]")
+        expected["date_tw"] = expected["date_tw"].astype("M8[s]")
+        expected["date_tq"] = expected["date_tq"].astype("M8[s]")
+        expected["date_th"] = expected["date_th"].astype("M8[s]")
+        expected["date_ty"] = expected["date_ty"].astype("M8[s]")
 
         parsed_115 = read_stata(datapath("io", "data", "stata", "stata9_115.dta"))
         parsed_117 = read_stata(datapath("io", "data", "stata", "stata9_117.dta"))
@@ -931,9 +930,7 @@ def test_dtype_conversion(self, datapath):
         expected["long_"] = expected["long_"].astype(np.int32)
         expected["float_"] = expected["float_"].astype(np.float32)
         expected["double_"] = expected["double_"].astype(np.float64)
-        # FIXME(GH#55564): can't astype directly to M8[ms] without OutOfBoundsDatetime
-        parr = expected["date_td"].astype("Period[D]")._values
-        expected["date_td"] = parr.view("M8[D]").astype("M8[s]")
+        expected["date_td"] = expected["date_td"].astype("M8[s]")
 
         no_conversion = read_stata(
             datapath("io", "data", "stata", "stata6_117.dta"), convert_dates=True

From 640aaf9c3fb9e3188b46bfa625025a168527a90f Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 30 Nov 2023 16:29:23 -0800
Subject: [PATCH 6/9] avoid Series.view

---
 pandas/io/stata.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index dd3b6ed41657e..8b72aad2eff9b 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -343,7 +343,7 @@ def _stata_elapsed_date_to_datetime_vec(dates: Series, fmt: str) -> Series:
         per_d_shifted = per_d + days
         per_s = per_d_shifted.dt.asfreq("s", how="S")
         conv_dates_arr = per_s.array.view("M8[s]")
-        conv_dates = Series(conv_dates_arr, index=per_s.index, name=per_s.name)
+        conv_dates = Series(conv_dates_arr, index=dates.index)
 
     else:
         raise ValueError(f"Date fmt {fmt} not understood")

From 2f538c7d75623d5a9f55fcd9ecbbf05cb166ec38 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Fri, 1 Dec 2023 08:32:45 -0800
Subject: [PATCH 7/9] dont go through Series

---
 pandas/io/stata.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index 8b72aad2eff9b..abebb4e2d1663 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -340,9 +340,9 @@ def _stata_elapsed_date_to_datetime_vec(dates: Series, fmt: str) -> Series:
         days = (dates % 52) * 7
         per_y = (year - 1970).array.view("Period[Y]")
         per_d = per_y.asfreq("D", how="S")
-        per_d_shifted = per_d + days
-        per_s = per_d_shifted.dt.asfreq("s", how="S")
-        conv_dates_arr = per_s.array.view("M8[s]")
+        per_d_shifted = per_d + days._values
+        per_s = per_d_shifted.asfreq("s", how="S")
+        conv_dates_arr = per_s.view("M8[s]")
         conv_dates = Series(conv_dates_arr, index=dates.index)
 
     else:

From 7b802c7c16d8b76dff17c9413c8aa5896afcd561 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 24 Jan 2024 10:27:46 -0800
Subject: [PATCH 8/9] move whatsnew

---
 doc/source/whatsnew/v2.2.0.rst | 1 -
 doc/source/whatsnew/v3.0.0.rst | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
index aabb1718e60b2..fad58657d0310 100644
--- a/doc/source/whatsnew/v2.2.0.rst
+++ b/doc/source/whatsnew/v2.2.0.rst
@@ -339,7 +339,6 @@ Other enhancements
 - :func:`read_spss` now returns a :class:`DataFrame` that stores the metadata in :attr:`DataFrame.attrs` (:issue:`54264`)
 - :func:`tseries.api.guess_datetime_format` is now part of the public API (:issue:`54727`)
 - :meth:`DataFrame.apply` now allows the usage of numba (via ``engine="numba"``) to JIT compile the passed function, allowing for potential speedups (:issue:`54666`)
-- :func:`read_stata` now returns ``datetime64`` resolutions better matching those natively stored in the stata format (:issue:`55642`)
 - :meth:`ExtensionArray._explode` interface method added to allow extension type implementations of the ``explode`` method (:issue:`54833`)
 - :meth:`ExtensionArray.duplicated` added to allow extension type implementations of the ``duplicated`` method (:issue:`55255`)
 - :meth:`Series.ffill`, :meth:`Series.bfill`, :meth:`DataFrame.ffill`, and :meth:`DataFrame.bfill` have gained the argument ``limit_area``; 3rd party :class:`.ExtensionArray` authors need to add this argument to the method ``_pad_or_backfill`` (:issue:`56492`)
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 950082f9281c5..1bf1e70597e38 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -28,7 +28,7 @@ enhancement2
 
 Other enhancements
 ^^^^^^^^^^^^^^^^^^
--
+- :func:`read_stata` now returns ``datetime64`` resolutions better matching those natively stored in the stata format (:issue:`55642`)
 -
 
 .. ---------------------------------------------------------------------------

From ea261aa8b1af8e20e5d8084c02fd3d8ca4bc9eb0 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 25 Jan 2024 18:55:38 -0800
Subject: [PATCH 9/9] remove outdated whatsnew

---
 doc/source/whatsnew/v2.2.0.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
index fad58657d0310..d9ab0452c8334 100644
--- a/doc/source/whatsnew/v2.2.0.rst
+++ b/doc/source/whatsnew/v2.2.0.rst
@@ -350,7 +350,6 @@ Other enhancements
 - Improved error message that appears in :meth:`DatetimeIndex.to_period` with frequencies which are not supported as period frequencies, such as ``"BMS"`` (:issue:`56243`)
 - Improved error message when constructing :class:`Period` with invalid offsets such as ``"QS"`` (:issue:`55785`)
 - The dtypes ``string[pyarrow]`` and ``string[pyarrow_numpy]`` now both utilize the ``large_string`` type from PyArrow to avoid overflow for long columns (:issue:`56259`)
-- :func:`read_stata` now returns ``datetime64`` resolutions better matching those natively stored in the stata format (:issue:`??`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_220.notable_bug_fixes: