Skip to content

Commit 3b57972

Browse files
authored
CoW: Enable CoW by default and remove warning build (#56633)
1 parent c3014ab commit 3b57972

File tree

13 files changed

+26
-123
lines changed

13 files changed

+26
-123
lines changed

Diff for: .github/workflows/unit-tests.yml

-12
Original file line numberDiff line numberDiff line change
@@ -73,18 +73,6 @@ jobs:
7373
env_file: actions-312.yaml
7474
pattern: "not slow and not network and not single_cpu"
7575
pandas_copy_on_write: "1"
76-
- name: "Copy-on-Write 3.11 (warnings)"
77-
env_file: actions-311.yaml
78-
pattern: "not slow and not network and not single_cpu"
79-
pandas_copy_on_write: "warn"
80-
- name: "Copy-on-Write 3.10 (warnings)"
81-
env_file: actions-310.yaml
82-
pattern: "not slow and not network and not single_cpu"
83-
pandas_copy_on_write: "warn"
84-
- name: "Copy-on-Write 3.9 (warnings)"
85-
env_file: actions-39.yaml
86-
pattern: "not slow and not network and not single_cpu"
87-
pandas_copy_on_write: "warn"
8876
- name: "Pypy"
8977
env_file: actions-pypy-39.yaml
9078
pattern: "not slow and not network and not single_cpu"

Diff for: asv_bench/benchmarks/algos/isin.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,8 @@ def setup(self, dtype):
5959
elif dtype in ["str", "string[python]", "string[pyarrow]"]:
6060
try:
6161
self.series = Series(
62-
Index([f"i-{i}" for i in range(N)], dtype=object), dtype=dtype
62+
Index([f"i-{i}" for i in range(N)], dtype=object)._values,
63+
dtype=dtype,
6364
)
6465
except ImportError as err:
6566
raise NotImplementedError from err

Diff for: asv_bench/benchmarks/strings.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ class Dtypes:
1919
def setup(self, dtype):
2020
try:
2121
self.s = Series(
22-
Index([f"i-{i}" for i in range(10000)], dtype=object), dtype=dtype
22+
Index([f"i-{i}" for i in range(10000)], dtype=object)._values,
23+
dtype=dtype,
2324
)
2425
except ImportError as err:
2526
raise NotImplementedError from err

Diff for: pandas/_config/__init__.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,11 @@
3232

3333

3434
def using_copy_on_write() -> bool:
35-
_mode_options = _global_config["mode"]
36-
return _mode_options["copy_on_write"] is True
35+
return True
3736

3837

3938
def warn_copy_on_write() -> bool:
40-
_mode_options = _global_config["mode"]
41-
return _mode_options["copy_on_write"] == "warn"
39+
return False
4240

4341

4442
def using_nullable_dtypes() -> bool:

Diff for: pandas/conftest.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1963,15 +1963,15 @@ def using_copy_on_write() -> bool:
19631963
"""
19641964
Fixture to check if Copy-on-Write is enabled.
19651965
"""
1966-
return pd.options.mode.copy_on_write is True
1966+
return True
19671967

19681968

19691969
@pytest.fixture
19701970
def warn_copy_on_write() -> bool:
19711971
"""
19721972
Fixture to check if Copy-on-Write is in warning mode.
19731973
"""
1974-
return pd.options.mode.copy_on_write == "warn"
1974+
return False
19751975

19761976

19771977
@pytest.fixture

Diff for: pandas/core/generic.py

+11-26
Original file line numberDiff line numberDiff line change
@@ -6704,8 +6704,7 @@ def copy(self, deep: bool_t | None = True) -> Self:
67046704
:ref:`gotchas <gotchas.thread-safety>` when copying in a threading
67056705
environment.
67066706
6707-
When ``copy_on_write`` in pandas config is set to ``True``, the
6708-
``copy_on_write`` config takes effect even when ``deep=False``.
6707+
Copy-on-Write protects shallow copies against accidental modifications.
67096708
This means that any changes to the copied data would make a new copy
67106709
of the data upon write (and vice versa). Changes made to either the
67116710
original or copied variable would not be reflected in the counterpart.
@@ -6731,12 +6730,15 @@ def copy(self, deep: bool_t | None = True) -> Self:
67316730
>>> deep = s.copy()
67326731
>>> shallow = s.copy(deep=False)
67336732
6734-
Shallow copy shares data and index with original.
6733+
Shallow copy shares index with original, the data is a
6734+
view of the original.
67356735
67366736
>>> s is shallow
67376737
False
6738-
>>> s.values is shallow.values and s.index is shallow.index
6739-
True
6738+
>>> s.values is shallow.values
6739+
False
6740+
>>> s.index is shallow.index
6741+
False
67406742
67416743
Deep copy has own copy of data and index.
67426744
@@ -6745,18 +6747,17 @@ def copy(self, deep: bool_t | None = True) -> Self:
67456747
>>> s.values is deep.values or s.index is deep.index
67466748
False
67476749
6748-
Updates to the data shared by shallow copy and original is reflected
6749-
in both (NOTE: this will no longer be true for pandas >= 3.0);
6750-
deep copy remains unchanged.
6750+
The shallow copy is protected against updating the original object
6751+
as well. Thus, updates will only reflect in one of both objects.
67516752
67526753
>>> s.iloc[0] = 3
67536754
>>> shallow.iloc[1] = 4
67546755
>>> s
67556756
a 3
6756-
b 4
6757+
b 2
67576758
dtype: int64
67586759
>>> shallow
6759-
a 3
6760+
a 1
67606761
b 4
67616762
dtype: int64
67626763
>>> deep
@@ -6779,22 +6780,6 @@ def copy(self, deep: bool_t | None = True) -> Self:
67796780
0 [10, 2]
67806781
1 [3, 4]
67816782
dtype: object
6782-
6783-
**Copy-on-Write is set to true**, the shallow copy is not modified
6784-
when the original data is changed:
6785-
6786-
>>> with pd.option_context("mode.copy_on_write", True):
6787-
... s = pd.Series([1, 2], index=["a", "b"])
6788-
... copy = s.copy(deep=False)
6789-
... s.iloc[0] = 100
6790-
... s
6791-
a 100
6792-
b 2
6793-
dtype: int64
6794-
>>> copy
6795-
a 1
6796-
b 2
6797-
dtype: int64
67986783
"""
67996784
data = self._mgr.copy(deep=deep)
68006785
self._clear_item_cache()

Diff for: pandas/core/indexes/multi.py

+2
Original file line numberDiff line numberDiff line change
@@ -3492,6 +3492,8 @@ def _to_bool_indexer(indexer) -> npt.NDArray[np.bool_]:
34923492
"cannot index with a boolean indexer that "
34933493
"is not the same length as the index"
34943494
)
3495+
if isinstance(k, (ABCSeries, Index)):
3496+
k = k._values
34953497
lvl_indexer = np.asarray(k)
34963498
if indexer is None:
34973499
lvl_indexer = lvl_indexer.copy()

Diff for: pandas/tests/copy_view/test_internals.py

-30
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
import pandas as pd
54
from pandas import DataFrame
65
import pandas._testing as tm
76
from pandas.tests.copy_view.util import get_array
@@ -42,35 +41,6 @@ def test_consolidate(using_copy_on_write):
4241
assert df.loc[0, "b"] == 0.1
4342

4443

45-
@pytest.mark.single_cpu
46-
def test_switch_options():
47-
# ensure we can switch the value of the option within one session
48-
# (assuming data is constructed after switching)
49-
50-
# using the option_context to ensure we set back to global option value
51-
# after running the test
52-
with pd.option_context("mode.copy_on_write", False):
53-
df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
54-
subset = df[:]
55-
subset.iloc[0, 0] = 0
56-
# df updated with CoW disabled
57-
assert df.iloc[0, 0] == 0
58-
59-
pd.options.mode.copy_on_write = True
60-
df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
61-
subset = df[:]
62-
subset.iloc[0, 0] = 0
63-
# df not updated with CoW enabled
64-
assert df.iloc[0, 0] == 1
65-
66-
pd.options.mode.copy_on_write = False
67-
df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
68-
subset = df[:]
69-
subset.iloc[0, 0] = 0
70-
# df updated with CoW disabled
71-
assert df.iloc[0, 0] == 0
72-
73-
7444
@pytest.mark.parametrize("dtype", [np.intp, np.int8])
7545
@pytest.mark.parametrize(
7646
"locs, arr",

Diff for: pandas/tests/extension/conftest.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,7 @@
22

33
import pytest
44

5-
from pandas import (
6-
Series,
7-
options,
8-
)
5+
from pandas import Series
96

107

118
@pytest.fixture
@@ -222,4 +219,4 @@ def using_copy_on_write() -> bool:
222219
"""
223220
Fixture to check if Copy-on-Write is enabled.
224221
"""
225-
return options.mode.copy_on_write is True
222+
return True

Diff for: pandas/tests/frame/methods/test_copy.py

-22
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,7 @@
11
import numpy as np
22
import pytest
33

4-
import pandas.util._test_decorators as td
5-
64
from pandas import DataFrame
7-
import pandas._testing as tm
85

96

107
class TestCopy:
@@ -18,25 +15,6 @@ def test_copy_index_name_checking(self, float_frame, attr):
1815
getattr(cp, attr).name = "foo"
1916
assert getattr(float_frame, attr).name is None
2017

21-
@td.skip_copy_on_write_invalid_test
22-
def test_copy_cache(self):
23-
# GH#31784 _item_cache not cleared on copy causes incorrect reads after updates
24-
df = DataFrame({"a": [1]})
25-
26-
df["x"] = [0]
27-
df["a"]
28-
29-
df.copy()
30-
31-
df["a"].values[0] = -1
32-
33-
tm.assert_frame_equal(df, DataFrame({"a": [-1], "x": [0]}))
34-
35-
df["y"] = [0]
36-
37-
assert df["a"].values[0] == -1
38-
tm.assert_frame_equal(df, DataFrame({"a": [-1], "x": [0], "y": [0]}))
39-
4018
def test_copy(self, float_frame, float_string_frame):
4119
cop = float_frame.copy()
4220
cop["E"] = cop["A"]

Diff for: pandas/tests/series/test_ufunc.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
import numpy as np
66
import pytest
77

8-
import pandas.util._test_decorators as td
9-
108
import pandas as pd
119
import pandas._testing as tm
1210
from pandas.arrays import SparseArray
@@ -456,8 +454,7 @@ def add3(x, y, z):
456454
ufunc(ser, ser, df)
457455

458456

459-
# TODO(CoW) see https://github.com/pandas-dev/pandas/pull/51082
460-
@td.skip_copy_on_write_not_yet_implemented
457+
@pytest.mark.xfail(reason="see https://github.com/pandas-dev/pandas/pull/51082")
461458
def test_np_fix():
462459
# np.fix is not a ufunc but is composed of several ufunc calls under the hood
463460
# with `out` and `where` keywords

Diff for: pandas/tests/test_downstream.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@ def test_dask_ufunc():
5858
s = Series([1.5, 2.3, 3.7, 4.0])
5959
ds = dd.from_pandas(s, npartitions=2)
6060

61-
result = da.fix(ds).compute()
62-
expected = np.fix(s)
61+
result = da.log(ds).compute()
62+
expected = np.log(s)
6363
tm.assert_series_equal(result, expected)
6464
finally:
6565
pd.set_option("compute.use_numexpr", olduse)

Diff for: pandas/util/_test_decorators.py

-14
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,9 @@ def test_foo():
3333

3434
import pytest
3535

36-
from pandas._config import get_option
37-
3836
if TYPE_CHECKING:
3937
from pandas._typing import F
4038

41-
4239
from pandas.compat import (
4340
IS64,
4441
is_platform_windows,
@@ -144,14 +141,3 @@ def documented_fixture(fixture):
144141
return fixture
145142

146143
return documented_fixture
147-
148-
149-
skip_copy_on_write_not_yet_implemented = pytest.mark.xfail(
150-
get_option("mode.copy_on_write") is True,
151-
reason="Not yet implemented/adapted for Copy-on-Write mode",
152-
)
153-
154-
skip_copy_on_write_invalid_test = pytest.mark.skipif(
155-
get_option("mode.copy_on_write") is True,
156-
reason="Test not valid for Copy-on-Write mode",
157-
)

0 commit comments

Comments
 (0)