Skip to content

API / CoW: detect and raise error for chained assignment under Copy-on-Write #49467

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Jan 24, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/reference/testing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ Exceptions and warnings
errors.AccessorRegistrationWarning
errors.AttributeConflictWarning
errors.CategoricalConversionWarning
errors.ChainedAssignmentError
errors.ClosedFileError
errors.CSSWarning
errors.DatabaseError
Expand Down
11 changes: 10 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import functools
from io import StringIO
import itertools
import sys
from textwrap import dedent
from typing import (
TYPE_CHECKING,
Expand Down Expand Up @@ -95,7 +96,11 @@
function as nv,
np_percentile_argname,
)
from pandas.errors import InvalidIndexError
from pandas.errors import (
ChainedAssignmentError,
InvalidIndexError,
_chained_assignment_msg,
)
from pandas.util._decorators import (
Appender,
Substitution,
Expand Down Expand Up @@ -3838,6 +3843,10 @@ def isetitem(self, loc, value) -> None:
self._iset_item_mgr(loc, arraylike, inplace=False)

def __setitem__(self, key, value):
if using_copy_on_write():
if sys.getrefcount(self) <= 3:
raise ChainedAssignmentError(_chained_assignment_msg)

key = com.apply_if_callable(key, self)

# see if we can slice the rows
Expand Down
10 changes: 10 additions & 0 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

from contextlib import suppress
import sys
from typing import (
TYPE_CHECKING,
Hashable,
Expand All @@ -12,6 +13,8 @@

import numpy as np

from pandas._config import using_copy_on_write

from pandas._libs.indexing import NDFrameIndexerBase
from pandas._libs.lib import item_from_zerodim
from pandas._typing import (
Expand All @@ -20,9 +23,11 @@
)
from pandas.errors import (
AbstractMethodError,
ChainedAssignmentError,
IndexingError,
InvalidIndexError,
LossySetitemError,
_chained_assignment_msg,
)
from pandas.util._decorators import doc

Expand Down Expand Up @@ -830,6 +835,11 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None:

@final
def __setitem__(self, key, value) -> None:
if using_copy_on_write():
# print("_LocationIndexer.__setitem__ refcount: ",sys.getrefcount(self.obj))
if sys.getrefcount(self.obj) <= 2:
raise ChainedAssignmentError(_chained_assignment_msg)

check_dict_or_set_indexers(key)
if isinstance(key, tuple):
key = tuple(list(x) if is_iterator(x) else x for x in key)
Expand Down
12 changes: 11 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""
from __future__ import annotations

import sys
from textwrap import dedent
from typing import (
IO,
Expand Down Expand Up @@ -68,7 +69,11 @@
npt,
)
from pandas.compat.numpy import function as nv
from pandas.errors import InvalidIndexError
from pandas.errors import (
ChainedAssignmentError,
InvalidIndexError,
_chained_assignment_msg,
)
from pandas.util._decorators import (
Appender,
Substitution,
Expand Down Expand Up @@ -1070,6 +1075,11 @@ def _get_value(self, label, takeable: bool = False):
return self.iloc[loc]

def __setitem__(self, key, value) -> None:
if using_copy_on_write():
# print("Series.__getitem__ refcount: ", sys.getrefcount(self))
if sys.getrefcount(self) <= 3:
raise ChainedAssignmentError(_chained_assignment_msg)

check_dict_or_set_indexers(key)
key = com.apply_if_callable(key, self)
cacher_needs_updating = self._check_is_chained_assignment_possible()
Expand Down
36 changes: 36 additions & 0 deletions pandas/errors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,42 @@ class SettingWithCopyWarning(Warning):
"""


class ChainedAssignmentError(ValueError):
"""
Exception raised when trying to set using chained assignment.

When the ``mode.copy_on_write`` option is enabled, chained assignment can
never work. In such a situation, we are always setting into a temporary
object that is the result of indexing (getitem), which under Copy-on-Write
always behaves as a copy. Thus, assigning through a chain can never
update the original Series or DataFrame.

For more information on view vs. copy,
see :ref:`the user guide<indexing.view_versus_copy>`.

Examples
--------
>>> pd.options.mode.copy_on_write = True
>>> df = pd.DataFrame({'A': [1, 1, 1, 2, 2]}, columns=['A'])
>>> df["A"][0:3] = 10 # doctest: +SKIP
... # ChainedAssignmentError: ...
"""


_chained_assignment_msg = (
"A value is trying to be set on a copy of a DataFrame or Series "
"through chained assignment.\n"
"When using the Copy-on-Write mode, such chained assignment never works "
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

never updates the original....

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's on the next line?

Copy link
Member

@phofl phofl Jan 16, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah sorry, should have been clearer:

I would reword to:

When using the Copy-on-Write mode, such chained assignment never updates the original DataFrame or Series, ...

This sounds better to me

"to update the original DataFrame or Series, because the intermediate "
"object on which we are setting values always behaves as a copy.\n\n"
"Try using '.loc[row_indexer, col_indexer] = value' instead, to perform "
"the assignment in a single step.\n\n"
"See the caveats in the documentation: "
"https://pandas.pydata.org/pandas-docs/stable/user_guide/"
"indexing.html#returning-a-view-versus-a-copy"
)


class NumExprClobberingError(NameError):
"""
Exception raised when trying to use a built-in numexpr name as a variable name.
Expand Down
10 changes: 7 additions & 3 deletions pandas/tests/frame/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import numpy as np
import pytest

from pandas.errors import ChainedAssignmentError
import pandas.util._test_decorators as td

from pandas.core.dtypes.base import _registry as ea_registry
Expand Down Expand Up @@ -1245,12 +1246,15 @@ def test_setitem_column_update_inplace(self, using_copy_on_write):
df = DataFrame({col: np.zeros(len(labels)) for col in labels}, index=labels)
values = df._mgr.blocks[0].values

for label in df.columns:
df[label][label] = 1

if not using_copy_on_write:
for label in df.columns:
df[label][label] = 1

# diagonal values all updated
assert np.all(values[np.arange(10), np.arange(10)] == 1)
else:
with pytest.raises(ChainedAssignmentError):
for label in df.columns:
df[label][label] = 1
# original dataframe not updated
assert np.all(values[np.arange(10), np.arange(10)] == 0)
8 changes: 6 additions & 2 deletions pandas/tests/frame/indexing/test_xs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
import numpy as np
import pytest

from pandas.errors import SettingWithCopyError
from pandas.errors import (
ChainedAssignmentError,
SettingWithCopyError,
)

from pandas import (
DataFrame,
Expand Down Expand Up @@ -124,7 +127,8 @@ def test_xs_view(self, using_array_manager, using_copy_on_write):
df_orig = dm.copy()

if using_copy_on_write:
dm.xs(2)[:] = 20
with pytest.raises(ChainedAssignmentError):
dm.xs(2)[:] = 20
tm.assert_frame_equal(dm, df_orig)
elif using_array_manager:
# INFO(ArrayManager) with ArrayManager getting a row as a view is
Expand Down
11 changes: 9 additions & 2 deletions pandas/tests/frame/test_block_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@
import numpy as np
import pytest

from pandas.errors import PerformanceWarning
from pandas.errors import (
ChainedAssignmentError,
PerformanceWarning,
)
import pandas.util._test_decorators as td

import pandas as pd
Expand Down Expand Up @@ -340,7 +343,11 @@ def test_stale_cached_series_bug_473(self, using_copy_on_write):
)
repr(Y)
Y["e"] = Y["e"].astype("object")
Y["g"]["c"] = np.NaN
if using_copy_on_write:
with pytest.raises(ChainedAssignmentError):
Y["g"]["c"] = np.NaN
else:
Y["g"]["c"] = np.NaN
repr(Y)
result = Y.sum() # noqa
exp = Y["g"].sum() # noqa
Expand Down
13 changes: 9 additions & 4 deletions pandas/tests/indexing/multiindex/test_chaining_and_caching.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import numpy as np
import pytest

from pandas.errors import SettingWithCopyError
from pandas.errors import (
ChainedAssignmentError,
SettingWithCopyError,
)
import pandas.util._test_decorators as td

from pandas import (
Expand Down Expand Up @@ -50,11 +53,13 @@ def test_cache_updating(using_copy_on_write):

# setting via chained assignment
# but actually works, since everything is a view
df.loc[0]["z"].iloc[0] = 1.0
result = df.loc[(0, 0), "z"]
if using_copy_on_write:
assert result == df_original.loc[0, "z"]
with pytest.raises(ChainedAssignmentError):
df.loc[0]["z"].iloc[0] = 1.0
assert df.loc[(0, 0), "z"] == df_original.loc[0, "z"]
else:
df.loc[0]["z"].iloc[0] = 1.0
result = df.loc[(0, 0), "z"]
assert result == 1

# correct setting
Expand Down
15 changes: 11 additions & 4 deletions pandas/tests/indexing/multiindex/test_partial.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import numpy as np
import pytest

from pandas.errors import ChainedAssignmentError
import pandas.util._test_decorators as td

from pandas import (
Expand Down Expand Up @@ -132,20 +133,26 @@ def test_partial_set(
exp.iloc[65:85] = 0
tm.assert_frame_equal(df, exp)

df["A"].loc[2000, 4] = 1
if not using_copy_on_write:
exp["A"].loc[2000, 4].values[:] = 1
if using_copy_on_write:
with pytest.raises(ChainedAssignmentError):
df["A"].loc[2000, 4] = 1
df.loc[(2000, 4), "A"] = 1
else:
df["A"].loc[2000, 4] = 1
exp.iloc[65:85, 0] = 1
tm.assert_frame_equal(df, exp)

df.loc[2000] = 5
exp.iloc[:100] = 5
tm.assert_frame_equal(df, exp)

# this works...for now
df["A"].iloc[14] = 5
if using_copy_on_write:
with pytest.raises(ChainedAssignmentError):
df["A"].iloc[14] = 5
df["A"].iloc[14] == exp["A"].iloc[14]
else:
df["A"].iloc[14] = 5
assert df["A"].iloc[14] == 5

@pytest.mark.parametrize("dtype", [int, float])
Expand Down
12 changes: 8 additions & 4 deletions pandas/tests/indexing/multiindex/test_setitem.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import numpy as np
import pytest

from pandas.errors import SettingWithCopyError
from pandas.errors import (
ChainedAssignmentError,
SettingWithCopyError,
)
import pandas.util._test_decorators as td

import pandas as pd
Expand Down Expand Up @@ -501,8 +504,8 @@ def test_frame_setitem_copy_raises(
# will raise/warn as its chained assignment
df = multiindex_dataframe_random_data.T
if using_copy_on_write:
# TODO(CoW) it would be nice if this could still warn/raise
df["foo"]["one"] = 2
with pytest.raises(ChainedAssignmentError):
df["foo"]["one"] = 2
else:
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
with pytest.raises(SettingWithCopyError, match=msg):
Expand All @@ -516,7 +519,8 @@ def test_frame_setitem_copy_no_write(
expected = frame
df = frame.copy()
if using_copy_on_write:
df["foo"]["one"] = 2
with pytest.raises(ChainedAssignmentError):
df["foo"]["one"] = 2
else:
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
with pytest.raises(SettingWithCopyError, match=msg):
Expand Down
Loading