Backport PR #52068 on branch 2.0.x (CoW: Switch to copy=False everywhere for Series constructor) (#52367)

meeseeksmachine · phofl · web-flow · commit 5d69654a4ad3 · 2023-04-02T19:49:26.000-07:00
Backport PR #52068: CoW: Switch to copy=False everywhere for Series constructor Co-authored-by: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -837,7 +837,7 @@ def value_counts(
     if bins is not None:
         from pandas.core.reshape.tile import cut
 
-        values = Series(values)
+        values = Series(values, copy=False)
         try:
             ii = cut(values, bins, include_lowest=True)
         except TypeError as err:
@@ -860,7 +860,7 @@ def value_counts(
     else:
         if is_extension_array_dtype(values):
             # handle Categorical and sparse,
-            result = Series(values)._values.value_counts(dropna=dropna)
+            result = Series(values, copy=False)._values.value_counts(dropna=dropna)
             result.name = name
             result.index.name = index_name
             counts = result._values
@@ -892,7 +892,7 @@ def value_counts(
                 idx = idx.astype(object)
             idx.name = index_name
 
-            result = Series(counts, index=idx, name=name)
+            result = Series(counts, index=idx, name=name, copy=False)
 
     if sort:
         result = result.sort_values(ascending=ascending)
diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py
@@ -448,7 +448,7 @@ def value_counts(self, dropna: bool = True) -> Series:
 
         index_arr = self._from_backing_data(np.asarray(result.index._data))
         index = Index(index_arr, name=result.index.name)
-        return Series(result._values, index=index, name=result.name)
+        return Series(result._values, index=index, name=result.name, copy=False)
 
     def _quantile(
         self: NDArrayBackedExtensionArrayT,
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
@@ -1104,7 +1104,7 @@ def value_counts(self, dropna: bool = True) -> Series:
 
         index = Index(type(self)(values))
 
-        return Series(counts, index=index, name="count")
+        return Series(counts, index=index, name="count", copy=False)
 
     @classmethod
     def _concat_same_type(
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -1500,7 +1500,9 @@ def value_counts(self, dropna: bool = True) -> Series:
         ix = coerce_indexer_dtype(ix, self.dtype.categories)
         ix = self._from_backing_data(ix)
 
-        return Series(count, index=CategoricalIndex(ix), dtype="int64", name="count")
+        return Series(
+            count, index=CategoricalIndex(ix), dtype="int64", name="count", copy=False
+        )
 
     # error: Argument 2 of "_empty" is incompatible with supertype
     # "NDArrayBackedExtensionArray"; supertype defines the argument type as
@@ -1758,7 +1760,9 @@ def _values_for_rank(self):
             #  reorder the categories (so rank can use the float codes)
             #  instead of passing an object array to rank
             values = np.array(
-                self.rename_categories(Series(self.categories).rank().values)
+                self.rename_categories(
+                    Series(self.categories, copy=False).rank().values
+                )
             )
         return values
 
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
@@ -1002,7 +1002,7 @@ def value_counts(self, dropna: bool = True) -> Series:
         )
 
         if dropna:
-            res = Series(value_counts, index=keys, name="count")
+            res = Series(value_counts, index=keys, name="count", copy=False)
             res.index = res.index.astype(self.dtype)
             res = res.astype("Int64")
             return res
@@ -1018,7 +1018,7 @@ def value_counts(self, dropna: bool = True) -> Series:
         mask = np.zeros(len(counts), dtype="bool")
         counts_array = IntegerArray(counts, mask)
 
-        return Series(counts_array, index=index, name="count")
+        return Series(counts_array, index=index, name="count", copy=False)
 
     @doc(ExtensionArray.equals)
     def equals(self, other) -> bool:
diff --git a/pandas/core/arrays/sparse/accessor.py b/pandas/core/arrays/sparse/accessor.py
@@ -219,6 +219,7 @@ def to_dense(self) -> Series:
             self._parent.array.to_dense(),
             index=self._parent.index,
             name=self._parent.name,
+            copy=False,
         )
 
 
diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
@@ -890,7 +890,7 @@ def value_counts(self, dropna: bool = True) -> Series:
             index = Index(keys)
         else:
             index = keys
-        return Series(counts, index=index)
+        return Series(counts, index=index, copy=False)
 
     # --------
     # Indexing
diff --git a/pandas/core/arrays/sparse/scipy_sparse.py b/pandas/core/arrays/sparse/scipy_sparse.py
@@ -195,7 +195,7 @@ def coo_to_sparse_series(
     from pandas import SparseDtype
 
     try:
-        ser = Series(A.data, MultiIndex.from_arrays((A.row, A.col)))
+        ser = Series(A.data, MultiIndex.from_arrays((A.row, A.col)), copy=False)
     except AttributeError as err:
         raise TypeError(
             f"Expected coo_matrix. Got {type(A).__name__} instead."
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -691,7 +691,7 @@ def value_counts(
             llab = lambda lab, inc: lab[inc]
         else:
             # lab is a Categorical with categories an IntervalIndex
-            cat_ser = cut(Series(val), bins, include_lowest=True)
+            cat_ser = cut(Series(val, copy=False), bins, include_lowest=True)
             cat_obj = cast("Categorical", cat_ser._values)
             lev = cat_obj.categories
             lab = lev.take(
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -1439,7 +1439,7 @@ def _agg_py_fallback(
 
         if values.ndim == 1:
             # For DataFrameGroupBy we only get here with ExtensionArray
-            ser = Series(values)
+            ser = Series(values, copy=False)
         else:
             # We only get here with values.dtype == object
             # TODO: special case not needed with ArrayManager
diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py
@@ -101,7 +101,7 @@ def _delegate_property_get(self, name):
             index = self.orig.index
         else:
             index = self._parent.index
-        # return the result as a Series, which is by definition a copy
+        # return the result as a Series
         result = Series(result, index=index, name=self.name).__finalize__(self._parent)
 
         # setting this object will show a SettingWithCopyWarning/Error
diff --git a/pandas/core/reshape/encoding.py b/pandas/core/reshape/encoding.py
@@ -236,7 +236,7 @@ def _get_dummies_1d(
     from pandas.core.reshape.concat import concat
 
     # Series avoids inconsistent NaN handling
-    codes, levels = factorize_from_iterable(Series(data))
+    codes, levels = factorize_from_iterable(Series(data, copy=False))
 
     if dtype is None:
         dtype = np.dtype(bool)
@@ -310,7 +310,7 @@ def get_empty_frame(data) -> DataFrame:
                 fill_value=fill_value,
                 dtype=dtype,
             )
-            sparse_series.append(Series(data=sarr, index=index, name=col))
+            sparse_series.append(Series(data=sarr, index=index, name=col, copy=False))
 
         return concat(sparse_series, axis=1, copy=False)
 
diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
@@ -379,7 +379,7 @@ def _get_series_list(self, others):
         if isinstance(others, ABCSeries):
             return [others]
         elif isinstance(others, ABCIndex):
-            return [Series(others._values, index=idx, dtype=others.dtype)]
+            return [Series(others, index=idx, dtype=others.dtype)]
         elif isinstance(others, ABCDataFrame):
             return [others[x] for x in others]
         elif isinstance(others, np.ndarray) and others.ndim == 2:
@@ -634,7 +634,7 @@ def cat(
             else:
                 dtype = self._orig.dtype
             res_ser = Series(
-                result, dtype=dtype, index=data.index, name=self._orig.name
+                result, dtype=dtype, index=data.index, name=self._orig.name, copy=False
             )
             out = res_ser.__finalize__(self._orig, method="str_cat")
         return out
diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py
@@ -376,7 +376,7 @@ def _str_get_dummies(self, sep: str = "|"):
             arr = sep + arr.astype(str) + sep
 
         tags: set[str] = set()
-        for ts in Series(arr).str.split(sep):
+        for ts in Series(arr, copy=False).str.split(sep):
             tags.update(ts)
         tags2 = sorted(tags - {""})
 
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
@@ -250,7 +250,7 @@ def _maybe_cache(
             cache_dates = convert_listlike(unique_dates, format)
             # GH#45319
             try:
-                cache_array = Series(cache_dates, index=unique_dates)
+                cache_array = Series(cache_dates, index=unique_dates, copy=False)
             except OutOfBoundsDatetime:
                 return cache_array
             # GH#39882 and GH#35888 in case of None and NaT we get duplicates
diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py
@@ -730,7 +730,7 @@ def cov_func(x, y):
                 self.ignore_na,
                 bias,
             )
-            return Series(result, index=x.index, name=x.name)
+            return Series(result, index=x.index, name=x.name, copy=False)
 
         return self._apply_pairwise(
             self._selected_obj, other, pairwise, cov_func, numeric_only
@@ -807,7 +807,7 @@ def _cov(X, Y):
                 x_var = _cov(x_array, x_array)
                 y_var = _cov(y_array, y_array)
                 result = cov / zsqrt(x_var * y_var)
-            return Series(result, index=x.index, name=x.name)
+            return Series(result, index=x.index, name=x.name, copy=False)
 
         return self._apply_pairwise(
             self._selected_obj, other, pairwise, cov_func, numeric_only
diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
@@ -383,7 +383,7 @@ def _insert_on_column(self, result: DataFrame, obj: DataFrame) -> None:
 
         if self.on is not None and not self._on.equals(obj.index):
             name = self._on.name
-            extra_col = Series(self._on, index=self.obj.index, name=name)
+            extra_col = Series(self._on, index=self.obj.index, name=name, copy=False)
             if name in result.columns:
                 # TODO: sure we want to overwrite results?
                 result[name] = extra_col
@@ -1413,7 +1413,7 @@ def _generate_cython_apply_func(
         def apply_func(values, begin, end, min_periods, raw=raw):
             if not raw:
                 # GH 45912
-                values = Series(values, index=self._on)
+                values = Series(values, index=self._on, copy=False)
             return window_func(values, begin, end, min_periods)
 
         return apply_func
@@ -1670,7 +1670,7 @@ def cov_func(x, y):
                     notna(x_array + y_array).astype(np.float64), start, end, 0
                 )
                 result = (mean_x_y - mean_x * mean_y) * (count_x_y / (count_x_y - ddof))
-            return Series(result, index=x.index, name=x.name)
+            return Series(result, index=x.index, name=x.name, copy=False)
 
         return self._apply_pairwise(
             self._selected_obj, other, pairwise, cov_func, numeric_only
@@ -1727,7 +1727,7 @@ def corr_func(x, y):
                 )
                 denominator = (x_var * y_var) ** 0.5
                 result = numerator / denominator
-            return Series(result, index=x.index, name=x.name)
+            return Series(result, index=x.index, name=x.name, copy=False)
 
         return self._apply_pairwise(
             self._selected_obj, other, pairwise, corr_func, numeric_only
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
@@ -1988,7 +1988,7 @@ def _do_convert_categoricals(
                     # TODO: if we get a non-copying rename_categories, use that
                     cat_data = cat_data.rename_categories(categories)
                 except ValueError as err:
-                    vc = Series(categories).value_counts()
+                    vc = Series(categories, copy=False).value_counts()
                     repeated_cats = list(vc.index[vc > 1])
                     repeats = "-" * 80 + "\n" + "\n".join(repeated_cats)
                     # GH 25772
@@ -2005,7 +2005,7 @@ def _do_convert_categoricals(
 """
                     raise ValueError(msg) from err
                 # TODO: is the next line needed above in the data(...) method?
-                cat_series = Series(cat_data, index=data.index)
+                cat_series = Series(cat_data, index=data.index, copy=False)
                 cat_converted_data.append((col, cat_series))
             else:
                 cat_converted_data.append((col, data[col]))
diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py
@@ -288,7 +288,7 @@ def _grouped_plot_by_column(
         ax_values.append(re_plotf)
         ax.grid(grid)
 
-    result = pd.Series(ax_values, index=columns)
+    result = pd.Series(ax_values, index=columns, copy=False)
 
     # Return axes in multiplot case, maybe revisit later # 985
     if return_type is None:

Original file line number	Diff line number	Diff line change
`@@ -219,6 +219,7 @@ def to_dense(self) -> Series:`
`219`	`219`	`self._parent.array.to_dense(),`
`220`	`220`	`index=self._parent.index,`
`221`	`221`	`name=self._parent.name,`
	`222`	`+ copy=False,`
`222`	`223`	`)`
`223`	`224`
`224`	`225`