Skip to content

Commit bfe51f8

Browse files
committed
COMPAT: remove SettingWithCopy warning, and use copy-on-write where applicable, pandas-dev#10954
1 parent 7ca2190 commit bfe51f8

14 files changed

+193
-232
lines changed

Diff for: pandas/core/common.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,12 @@
2626
class PandasError(Exception):
2727
pass
2828

29-
30-
class SettingWithCopyError(ValueError):
29+
class SettingImmutableError(ValueError):
3130
pass
3231

33-
34-
class SettingWithCopyWarning(Warning):
32+
class SettingWithCopyError(ValueError):
3533
pass
3634

37-
3835
class AmbiguousIndexError(PandasError, KeyError):
3936
pass
4037

Diff for: pandas/core/config_init.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -321,13 +321,12 @@ def use_inf_as_null_cb(key):
321321
# user warnings
322322
chained_assignment = """
323323
: string
324-
Raise an exception, warn, or no action if trying to use chained assignment,
325-
The default is warn
324+
this option has been deprecated and has no effect
326325
"""
327326

328-
with cf.config_prefix('mode'):
329-
cf.register_option('chained_assignment', 'warn', chained_assignment,
330-
validator=is_one_of_factory([None, 'warn', 'raise']))
327+
cf.register_option('mode.chained_assignment', 'warn', chained_assignment,
328+
validator=is_one_of_factory([None, 'warn', 'raise']))
329+
cf.deprecate_option('mode.chained_assignment', chained_assignment)
331330

332331

333332
# Set up the io.excel specific configuration.

Diff for: pandas/core/frame.py

+4-10
Original file line numberDiff line numberDiff line change
@@ -2229,7 +2229,7 @@ def __setitem__(self, key, value):
22292229
self._set_item(key, value)
22302230

22312231
def _setitem_slice(self, key, value):
2232-
self._check_setitem_copy()
2232+
self._check_copy_on_write()
22332233
self.ix._setitem_with_indexer(key, value)
22342234

22352235
def _setitem_array(self, key, value):
@@ -2240,7 +2240,7 @@ def _setitem_array(self, key, value):
22402240
(len(key), len(self.index)))
22412241
key = check_bool_indexer(self.index, key)
22422242
indexer = key.nonzero()[0]
2243-
self._check_setitem_copy()
2243+
self._check_copy_on_write()
22442244
self.ix._setitem_with_indexer(indexer, value)
22452245
else:
22462246
if isinstance(value, DataFrame):
@@ -2250,7 +2250,7 @@ def _setitem_array(self, key, value):
22502250
self[k1] = value[k2]
22512251
else:
22522252
indexer = self.ix._convert_to_indexer(key, axis=1)
2253-
self._check_setitem_copy()
2253+
self._check_copy_on_write()
22542254
self.ix._setitem_with_indexer((slice(None), indexer), value)
22552255

22562256
def _setitem_frame(self, key, value):
@@ -2260,7 +2260,7 @@ def _setitem_frame(self, key, value):
22602260
raise TypeError('Must pass DataFrame with boolean values only')
22612261

22622262
self._check_inplace_setting(value)
2263-
self._check_setitem_copy()
2263+
self._check_copy_on_write()
22642264
self.where(-key, value, inplace=True)
22652265

22662266
def _ensure_valid_index(self, value):
@@ -2296,12 +2296,6 @@ def _set_item(self, key, value):
22962296
value = self._sanitize_column(key, value)
22972297
NDFrame._set_item(self, key, value)
22982298

2299-
# check if we are modifying a copy
2300-
# try to set first as we want an invalid
2301-
# value exeption to occur first
2302-
if len(self):
2303-
self._check_setitem_copy()
2304-
23052299
def insert(self, loc, column, value, allow_duplicates=False):
23062300
"""
23072301
Insert column into DataFrame at specified location.

Diff for: pandas/core/generic.py

+62-94
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import operator
44
import weakref
55
import gc
6+
import inspect
67
import numpy as np
78
import pandas.lib as lib
89

@@ -21,8 +22,7 @@
2122
from pandas.core.common import (isnull, notnull, is_list_like,
2223
_values_from_object, _maybe_promote,
2324
_maybe_box_datetimelike, ABCSeries,
24-
SettingWithCopyError, SettingWithCopyWarning,
25-
AbstractMethodError)
25+
AbstractMethodError, SettingWithCopyError)
2626
import pandas.core.nanops as nanops
2727
from pandas.util.decorators import Appender, Substitution, deprecate_kwarg
2828
from pandas.core import config
@@ -79,12 +79,13 @@ class NDFrame(PandasObject):
7979
copy : boolean, default False
8080
"""
8181
_internal_names = ['_data', '_cacher', '_item_cache', '_cache',
82-
'is_copy', '_subtyp', '_index',
82+
'is_copy', '_subtyp', '_index', '_allow_copy_on_write',
8383
'_default_kind', '_default_fill_value', '_metadata',
8484
'__array_struct__', '__array_interface__']
8585
_internal_names_set = set(_internal_names)
8686
_accessors = frozenset([])
8787
_metadata = []
88+
_allow_copy_on_write = True
8889
is_copy = None
8990

9091
def __init__(self, data, axes=None, copy=False, dtype=None,
@@ -1173,7 +1174,7 @@ def _maybe_update_cacher(self, clear=False, verify_is_copy=True):
11731174
pass
11741175

11751176
if verify_is_copy:
1176-
self._check_setitem_copy(stacklevel=5, t='referant')
1177+
self._check_copy_on_write()
11771178

11781179
if clear:
11791180
self._clear_item_cache()
@@ -1202,6 +1203,8 @@ def _slice(self, slobj, axis=0, kind=None):
12021203
return result
12031204

12041205
def _set_item(self, key, value):
1206+
1207+
self._check_copy_on_write()
12051208
self._data.set(key, value)
12061209
self._clear_item_cache()
12071210

@@ -1214,10 +1217,54 @@ def _set_is_copy(self, ref=None, copy=True):
12141217
else:
12151218
self.is_copy = None
12161219

1220+
def _check_copy_on_write(self):
1221+
1222+
# we could have a copy-on-write scenario
1223+
if self.is_copy and self._allow_copy_on_write:
1224+
1225+
# we have an exception
1226+
if isinstance(self.is_copy, Exception):
1227+
raise self.is_copy
1228+
1229+
def get_names_for_obj(__really_unused_name__342424__):
1230+
"""Returns all named references for self"""
1231+
1232+
removals = set(["__really_unused_name__342424__", "__really_unused_name__xxxxx__", "self"])
1233+
refs = gc.get_referrers(__really_unused_name__342424__)
1234+
1235+
names = []
1236+
for ref in refs:
1237+
if inspect.isframe(ref):
1238+
for name, __really_unused_name__xxxxx__ in ref.f_locals.iteritems():
1239+
if __really_unused_name__xxxxx__ is __really_unused_name__342424__:
1240+
names.append(name)
1241+
1242+
for name, __really_unused_name__xxxxx__ in globals().iteritems():
1243+
if __really_unused_name__xxxxx__ is __really_unused_name__342424__:
1244+
names.append(name)
1245+
1246+
return set(names) - removals
1247+
1248+
# collect garbage
1249+
# if we don't have references, then we have a reassignment case
1250+
# e.g. df = df.ix[....]; since the reference is gone
1251+
# we can just copy and be done
1252+
1253+
# otherwise we have chained indexing, raise and error
1254+
gc.collect(2)
1255+
if self.is_copy() is not None:
1256+
names = get_names_for_obj(self)
1257+
if not len(names):
1258+
raise SettingWithCopyError("chained indexing detected, you can fix this ......")
1259+
1260+
# provide copy-on-write
1261+
self._data = self._data.copy()
1262+
self.is_copy = None
1263+
12171264
def _check_is_chained_assignment_possible(self):
12181265
"""
12191266
check if we are a view, have a cacher, and are of mixed type
1220-
if so, then force a setitem_copy check
1267+
if so, then force a copy_on_write check
12211268
12221269
should be called just near setting a value
12231270
@@ -1227,91 +1274,12 @@ def _check_is_chained_assignment_possible(self):
12271274
if self._is_view and self._is_cached:
12281275
ref = self._get_cacher()
12291276
if ref is not None and ref._is_mixed_type:
1230-
self._check_setitem_copy(stacklevel=4, t='referant', force=True)
1277+
self._check_copy_on_write()
12311278
return True
12321279
elif self.is_copy:
1233-
self._check_setitem_copy(stacklevel=4, t='referant')
1280+
self._check_copy_on_write()
12341281
return False
12351282

1236-
def _check_setitem_copy(self, stacklevel=4, t='setting', force=False):
1237-
"""
1238-
1239-
Parameters
1240-
----------
1241-
stacklevel : integer, default 4
1242-
the level to show of the stack when the error is output
1243-
t : string, the type of setting error
1244-
force : boolean, default False
1245-
if True, then force showing an error
1246-
1247-
validate if we are doing a settitem on a chained copy.
1248-
1249-
If you call this function, be sure to set the stacklevel such that the
1250-
user will see the error *at the level of setting*
1251-
1252-
It is technically possible to figure out that we are setting on
1253-
a copy even WITH a multi-dtyped pandas object. In other words, some blocks
1254-
may be views while other are not. Currently _is_view will ALWAYS return False
1255-
for multi-blocks to avoid having to handle this case.
1256-
1257-
df = DataFrame(np.arange(0,9), columns=['count'])
1258-
df['group'] = 'b'
1259-
1260-
# this technically need not raise SettingWithCopy if both are view (which is not
1261-
# generally guaranteed but is usually True
1262-
# however, this is in general not a good practice and we recommend using .loc
1263-
df.iloc[0:5]['group'] = 'a'
1264-
1265-
"""
1266-
1267-
if force or self.is_copy:
1268-
1269-
value = config.get_option('mode.chained_assignment')
1270-
if value is None:
1271-
return
1272-
1273-
# see if the copy is not actually refererd; if so, then disolve
1274-
# the copy weakref
1275-
try:
1276-
gc.collect(2)
1277-
if not gc.get_referents(self.is_copy()):
1278-
self.is_copy = None
1279-
return
1280-
except:
1281-
pass
1282-
1283-
# we might be a false positive
1284-
try:
1285-
if self.is_copy().shape == self.shape:
1286-
self.is_copy = None
1287-
return
1288-
except:
1289-
pass
1290-
1291-
# a custom message
1292-
if isinstance(self.is_copy, string_types):
1293-
t = self.is_copy
1294-
1295-
elif t == 'referant':
1296-
t = ("\n"
1297-
"A value is trying to be set on a copy of a slice from a "
1298-
"DataFrame\n\n"
1299-
"See the caveats in the documentation: "
1300-
"http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy")
1301-
1302-
else:
1303-
t = ("\n"
1304-
"A value is trying to be set on a copy of a slice from a "
1305-
"DataFrame.\n"
1306-
"Try using .loc[row_indexer,col_indexer] = value instead\n\n"
1307-
"See the caveats in the documentation: "
1308-
"http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy")
1309-
1310-
if value == 'raise':
1311-
raise SettingWithCopyError(t)
1312-
elif value == 'warn':
1313-
warnings.warn(t, SettingWithCopyWarning, stacklevel=stacklevel)
1314-
13151283
def __delitem__(self, key):
13161284
"""
13171285
Delete item
@@ -3376,11 +3344,11 @@ def resample(self, rule, how=None, axis=0, fill_method=None,
33763344
For frequencies that evenly subdivide 1 day, the "origin" of the
33773345
aggregated intervals. For example, for '5min' frequency, base could
33783346
range from 0 through 4. Defaults to 0
3379-
3347+
33803348
33813349
Examples
33823350
--------
3383-
3351+
33843352
Start by creating a series with 9 one minute timestamps.
33853353
33863354
>>> index = pd.date_range('1/1/2000', periods=9, freq='T')
@@ -3409,11 +3377,11 @@ def resample(self, rule, how=None, axis=0, fill_method=None,
34093377
Downsample the series into 3 minute bins as above, but label each
34103378
bin using the right edge instead of the left. Please note that the
34113379
value in the bucket used as the label is not included in the bucket,
3412-
which it labels. For example, in the original series the
3380+
which it labels. For example, in the original series the
34133381
bucket ``2000-01-01 00:03:00`` contains the value 3, but the summed
3414-
value in the resampled bucket with the label``2000-01-01 00:03:00``
3382+
value in the resampled bucket with the label``2000-01-01 00:03:00``
34153383
does not include 3 (if it did, the summed value would be 6, not 3).
3416-
To include this value close the right side of the bin interval as
3384+
To include this value close the right side of the bin interval as
34173385
illustrated in the example below this one.
34183386
34193387
>>> series.resample('3T', how='sum', label='right')
@@ -3424,7 +3392,7 @@ def resample(self, rule, how=None, axis=0, fill_method=None,
34243392
34253393
Downsample the series into 3 minute bins as above, but close the right
34263394
side of the bin interval.
3427-
3395+
34283396
>>> series.resample('3T', how='sum', label='right', closed='right')
34293397
2000-01-01 00:00:00 0
34303398
2000-01-01 00:03:00 6
@@ -3453,7 +3421,7 @@ def resample(self, rule, how=None, axis=0, fill_method=None,
34533421
2000-01-01 00:02:00 2
34543422
Freq: 30S, dtype: int64
34553423
3456-
Upsample the series into 30 second bins and fill the
3424+
Upsample the series into 30 second bins and fill the
34573425
``NaN`` values using the ``bfill`` method.
34583426
34593427
>>> series.resample('30S', fill_method='bfill')[0:5]
@@ -3468,7 +3436,7 @@ def resample(self, rule, how=None, axis=0, fill_method=None,
34683436
34693437
>>> def custom_resampler(array_like):
34703438
... return np.sum(array_like)+5
3471-
3439+
34723440
>>> series.resample('3T', how=custom_resampler)
34733441
2000-01-01 00:00:00 8
34743442
2000-01-01 00:03:00 17

0 commit comments

Comments
 (0)