From de6aefe9031e68a0ca42730cff67779f1c674c57 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 10 Mar 2025 15:37:56 +0000 Subject: [PATCH 1/2] clean: remove outdated perioddtype comment --- pandas/_libs/tslibs/period.pyx | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index bef1956996b4f..97d624e802c31 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1752,9 +1752,6 @@ cdef class _Period(PeriodMixin): def __cinit__(self, int64_t ordinal, BaseOffset freq): self.ordinal = ordinal self.freq = freq - # Note: this is more performant than PeriodDtype.from_date_offset(freq) - # because from_date_offset cannot be made a cdef method (until cython - # supported cdef classmethods) self._dtype = PeriodDtypeBase(freq._period_dtype_code, freq.n) @classmethod From f0f6bcf1f3c7c42cf2b27a0bddb3284dc8819f4b Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 10 Mar 2025 15:45:06 +0000 Subject: [PATCH 2/2] drive-by walruses --- pandas/core/arrays/categorical.py | 3 +-- pandas/core/base.py | 3 +-- pandas/core/computation/engines.py | 3 +-- pandas/core/dtypes/common.py | 3 +-- pandas/core/dtypes/dtypes.py | 6 ++---- pandas/core/groupby/grouper.py | 6 ++---- pandas/core/groupby/ops.py | 3 +-- pandas/core/indexes/base.py | 3 +-- pandas/core/interchange/from_dataframe.py | 9 +++------ pandas/core/strings/object_array.py | 3 +-- pandas/core/window/rolling.py | 3 +-- 11 files changed, 15 insertions(+), 30 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 0ce700772fdcc..0fb57fab7a90c 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2974,8 +2974,7 @@ def _delegate_method(self, name: str, *args, **kwargs): from pandas import Series method = getattr(self._parent, name) - res = method(*args, **kwargs) - if res is not None: + if (res := method(*args, **kwargs)) is not None: return Series(res, index=self._index, name=self._name) diff --git a/pandas/core/base.py b/pandas/core/base.py index a64cd8633c1db..e2947b98d0954 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -126,8 +126,7 @@ def __sizeof__(self) -> int: Generates the total memory usage for an object that returns either a value or Series of values """ - memory_usage = getattr(self, "memory_usage", None) - if memory_usage: + if (memory_usage := getattr(self, "memory_usage", None)): mem = memory_usage(deep=True) return int(mem if is_scalar(mem) else mem.sum()) diff --git a/pandas/core/computation/engines.py b/pandas/core/computation/engines.py index d2a181cbb3c36..5e118f111df97 100644 --- a/pandas/core/computation/engines.py +++ b/pandas/core/computation/engines.py @@ -36,9 +36,8 @@ def _check_ne_builtin_clash(expr: Expr) -> None: Terms can contain """ names = expr.names - overlap = names & _ne_builtins - if overlap: + if (overlap := names & _ne_builtins): s = ", ".join([repr(x) for x in overlap]) raise NumExprClobberingError( f'Variables in expression "{expr}" overlap with builtins: ({s})' diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index e92f2363b69f1..7387a90932cc8 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1819,8 +1819,7 @@ def pandas_dtype(dtype) -> DtypeObj: return StringDtype(na_value=np.nan) # registered extension types - result = registry.find(dtype) - if result is not None: + if (result := registry.find(dtype)) is not None: if isinstance(result, type): # GH 31356, GH 54592 warnings.warn( diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index a02a8b8b110bf..be5ea3ee3fc12 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -889,8 +889,7 @@ def construct_from_string(cls, string: str_type) -> DatetimeTZDtype: ) msg = f"Cannot construct a 'DatetimeTZDtype' from '{string}'" - match = cls._match.match(string) - if match: + if (match := cls._match.match(string)): d = match.groupdict() try: return cls(unit=d["unit"], tz=d["tz"]) @@ -1999,9 +1998,8 @@ def _parse_subtype(dtype: str) -> tuple[str, bool]: When the subtype cannot be extracted. """ xpr = re.compile(r"Sparse\[(?P[^,]*)(, )?(?P.*?)?\]$") - m = xpr.match(dtype) has_fill_value = False - if m: + if (m := xpr.match(dtype)): subtype = m.groupdict()["subtype"] has_fill_value = bool(m.groupdict()["fill_value"]) elif dtype == "Sparse": diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index c9d874fc08dbe..51fdfe89496df 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -460,8 +460,7 @@ def __init__( # we have a single grouper which may be a myriad of things, # some of which are dependent on the passing in level - ilevel = self._ilevel - if ilevel is not None: + if (ilevel := self._ilevel) is not None: # In extant tests, the new self.grouping_vector matches # `index.get_level_values(ilevel)` whenever # mapper is None and isinstance(index, MultiIndex) @@ -547,8 +546,7 @@ def _passed_categorical(self) -> bool: @cache_readonly def name(self) -> Hashable: - ilevel = self._ilevel - if ilevel is not None: + if (ilevel := self._ilevel) is not None: return self._index.names[ilevel] if isinstance(self._orig_grouper, (Index, Series)): diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index c4c7f73ee166c..42e79d187124c 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -695,9 +695,8 @@ def size(self) -> Series: Compute group sizes. """ ids = self.ids - ngroups = self.ngroups out: np.ndarray | list - if ngroups: + if (ngroups := self.ngroups): out = np.bincount(ids[ids != -1], minlength=ngroups) else: out = [] diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c17c8c1d9a172..175d1f6d2f995 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -6108,9 +6108,8 @@ def _raise_if_missing(self, key, indexer, axis_name: str_t) -> None: # Count missing values missing_mask = indexer < 0 - nmissing = missing_mask.sum() - if nmissing: + if (nmissing := missing_mask.sum()): if nmissing == len(indexer): raise KeyError(f"None of [{key}] are in the [{axis_name}]") diff --git a/pandas/core/interchange/from_dataframe.py b/pandas/core/interchange/from_dataframe.py index b990eca39b3dd..0bad350d86873 100644 --- a/pandas/core/interchange/from_dataframe.py +++ b/pandas/core/interchange/from_dataframe.py @@ -144,8 +144,7 @@ def _from_dataframe(df: DataFrameXchg, allow_copy: bool = True) -> pd.DataFrame: else: pandas_df = pd.concat(pandas_dfs, axis=0, ignore_index=True, copy=False) - index_obj = df.metadata.get("pandas.index", None) - if index_obj is not None: + if (index_obj := df.metadata.get("pandas.index", None)) is not None: pandas_df.index = index_obj return pandas_df @@ -372,8 +371,7 @@ def string_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]: def parse_datetime_format_str(format_str, data) -> pd.Series | np.ndarray: """Parse datetime `format_str` to interpret the `data`.""" # timestamp 'ts{unit}:tz' - timestamp_meta = re.match(r"ts([smun]):(.*)", format_str) - if timestamp_meta: + if (timestamp_meta := re.match(r"ts([smun]):(.*)", format_str)): unit, tz = timestamp_meta.group(1), timestamp_meta.group(2) if unit != "s": # the format string describes only a first letter of the unit, so @@ -386,8 +384,7 @@ def parse_datetime_format_str(format_str, data) -> pd.Series | np.ndarray: return data # date 'td{Days/Ms}' - date_meta = re.match(r"td([Dm])", format_str) - if date_meta: + if (date_meta := re.match(r"td([Dm])", format_str)): unit = date_meta.group(1) if unit == "D": # NumPy doesn't support DAY unit, so converting days to seconds diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py index 0adb7b51cf2b7..daf601766764c 100644 --- a/pandas/core/strings/object_array.py +++ b/pandas/core/strings/object_array.py @@ -530,8 +530,7 @@ def g(x): def f(x): if not isinstance(x, str): return empty_row - m = regex.search(x) - if m: + if (m := regex.search(x)): return [na_value if item is None else item for item in m.groups()] else: return empty_row diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 69fce8cf2137e..a589187882788 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -741,8 +741,7 @@ def _apply( codes = self._grouper.codes levels = copy.copy(self._grouper.levels) - group_indices = self._grouper.indices.values() - if group_indices: + if (group_indices := self._grouper.indices.values()): indexer = np.concatenate(list(group_indices)) else: indexer = np.array([], dtype=np.intp)