3
3
import operator
4
4
import weakref
5
5
import gc
6
+ import inspect
6
7
import numpy as np
7
8
import pandas .lib as lib
8
9
21
22
from pandas .core .common import (isnull , notnull , is_list_like ,
22
23
_values_from_object , _maybe_promote ,
23
24
_maybe_box_datetimelike , ABCSeries ,
24
- SettingWithCopyError , SettingWithCopyWarning ,
25
- AbstractMethodError )
25
+ AbstractMethodError , SettingWithCopyError )
26
26
import pandas .core .nanops as nanops
27
27
from pandas .util .decorators import Appender , Substitution , deprecate_kwarg
28
28
from pandas .core import config
@@ -79,12 +79,13 @@ class NDFrame(PandasObject):
79
79
copy : boolean, default False
80
80
"""
81
81
_internal_names = ['_data' , '_cacher' , '_item_cache' , '_cache' ,
82
- 'is_copy' , '_subtyp' , '_index' ,
82
+ 'is_copy' , '_subtyp' , '_index' , '_allow_copy_on_write' ,
83
83
'_default_kind' , '_default_fill_value' , '_metadata' ,
84
84
'__array_struct__' , '__array_interface__' ]
85
85
_internal_names_set = set (_internal_names )
86
86
_accessors = frozenset ([])
87
87
_metadata = []
88
+ _allow_copy_on_write = True
88
89
is_copy = None
89
90
90
91
def __init__ (self , data , axes = None , copy = False , dtype = None ,
@@ -1173,7 +1174,7 @@ def _maybe_update_cacher(self, clear=False, verify_is_copy=True):
1173
1174
pass
1174
1175
1175
1176
if verify_is_copy :
1176
- self ._check_setitem_copy ( stacklevel = 5 , t = 'referant' )
1177
+ self ._check_copy_on_write ( )
1177
1178
1178
1179
if clear :
1179
1180
self ._clear_item_cache ()
@@ -1202,6 +1203,8 @@ def _slice(self, slobj, axis=0, kind=None):
1202
1203
return result
1203
1204
1204
1205
def _set_item (self , key , value ):
1206
+
1207
+ self ._check_copy_on_write ()
1205
1208
self ._data .set (key , value )
1206
1209
self ._clear_item_cache ()
1207
1210
@@ -1214,10 +1217,54 @@ def _set_is_copy(self, ref=None, copy=True):
1214
1217
else :
1215
1218
self .is_copy = None
1216
1219
1220
+ def _check_copy_on_write (self ):
1221
+
1222
+ # we could have a copy-on-write scenario
1223
+ if self .is_copy and self ._allow_copy_on_write :
1224
+
1225
+ # we have an exception
1226
+ if isinstance (self .is_copy , Exception ):
1227
+ raise self .is_copy
1228
+
1229
+ def get_names_for_obj (__really_unused_name__342424__ ):
1230
+ """Returns all named references for self"""
1231
+
1232
+ removals = set (["__really_unused_name__342424__" , "__really_unused_name__xxxxx__" , "self" ])
1233
+ refs = gc .get_referrers (__really_unused_name__342424__ )
1234
+
1235
+ names = []
1236
+ for ref in refs :
1237
+ if inspect .isframe (ref ):
1238
+ for name , __really_unused_name__xxxxx__ in ref .f_locals .iteritems ():
1239
+ if __really_unused_name__xxxxx__ is __really_unused_name__342424__ :
1240
+ names .append (name )
1241
+
1242
+ for name , __really_unused_name__xxxxx__ in globals ().iteritems ():
1243
+ if __really_unused_name__xxxxx__ is __really_unused_name__342424__ :
1244
+ names .append (name )
1245
+
1246
+ return set (names ) - removals
1247
+
1248
+ # collect garbage
1249
+ # if we don't have references, then we have a reassignment case
1250
+ # e.g. df = df.ix[....]; since the reference is gone
1251
+ # we can just copy and be done
1252
+
1253
+ # otherwise we have chained indexing, raise and error
1254
+ gc .collect (2 )
1255
+ if self .is_copy () is not None :
1256
+ names = get_names_for_obj (self )
1257
+ if not len (names ):
1258
+ raise SettingWithCopyError ("chained indexing detected, you can fix this ......" )
1259
+
1260
+ # provide copy-on-write
1261
+ self ._data = self ._data .copy ()
1262
+ self .is_copy = None
1263
+
1217
1264
def _check_is_chained_assignment_possible (self ):
1218
1265
"""
1219
1266
check if we are a view, have a cacher, and are of mixed type
1220
- if so, then force a setitem_copy check
1267
+ if so, then force a copy_on_write check
1221
1268
1222
1269
should be called just near setting a value
1223
1270
@@ -1227,91 +1274,12 @@ def _check_is_chained_assignment_possible(self):
1227
1274
if self ._is_view and self ._is_cached :
1228
1275
ref = self ._get_cacher ()
1229
1276
if ref is not None and ref ._is_mixed_type :
1230
- self ._check_setitem_copy ( stacklevel = 4 , t = 'referant' , force = True )
1277
+ self ._check_copy_on_write ( )
1231
1278
return True
1232
1279
elif self .is_copy :
1233
- self ._check_setitem_copy ( stacklevel = 4 , t = 'referant' )
1280
+ self ._check_copy_on_write ( )
1234
1281
return False
1235
1282
1236
- def _check_setitem_copy (self , stacklevel = 4 , t = 'setting' , force = False ):
1237
- """
1238
-
1239
- Parameters
1240
- ----------
1241
- stacklevel : integer, default 4
1242
- the level to show of the stack when the error is output
1243
- t : string, the type of setting error
1244
- force : boolean, default False
1245
- if True, then force showing an error
1246
-
1247
- validate if we are doing a settitem on a chained copy.
1248
-
1249
- If you call this function, be sure to set the stacklevel such that the
1250
- user will see the error *at the level of setting*
1251
-
1252
- It is technically possible to figure out that we are setting on
1253
- a copy even WITH a multi-dtyped pandas object. In other words, some blocks
1254
- may be views while other are not. Currently _is_view will ALWAYS return False
1255
- for multi-blocks to avoid having to handle this case.
1256
-
1257
- df = DataFrame(np.arange(0,9), columns=['count'])
1258
- df['group'] = 'b'
1259
-
1260
- # this technically need not raise SettingWithCopy if both are view (which is not
1261
- # generally guaranteed but is usually True
1262
- # however, this is in general not a good practice and we recommend using .loc
1263
- df.iloc[0:5]['group'] = 'a'
1264
-
1265
- """
1266
-
1267
- if force or self .is_copy :
1268
-
1269
- value = config .get_option ('mode.chained_assignment' )
1270
- if value is None :
1271
- return
1272
-
1273
- # see if the copy is not actually refererd; if so, then disolve
1274
- # the copy weakref
1275
- try :
1276
- gc .collect (2 )
1277
- if not gc .get_referents (self .is_copy ()):
1278
- self .is_copy = None
1279
- return
1280
- except :
1281
- pass
1282
-
1283
- # we might be a false positive
1284
- try :
1285
- if self .is_copy ().shape == self .shape :
1286
- self .is_copy = None
1287
- return
1288
- except :
1289
- pass
1290
-
1291
- # a custom message
1292
- if isinstance (self .is_copy , string_types ):
1293
- t = self .is_copy
1294
-
1295
- elif t == 'referant' :
1296
- t = ("\n "
1297
- "A value is trying to be set on a copy of a slice from a "
1298
- "DataFrame\n \n "
1299
- "See the caveats in the documentation: "
1300
- "http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy" )
1301
-
1302
- else :
1303
- t = ("\n "
1304
- "A value is trying to be set on a copy of a slice from a "
1305
- "DataFrame.\n "
1306
- "Try using .loc[row_indexer,col_indexer] = value instead\n \n "
1307
- "See the caveats in the documentation: "
1308
- "http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy" )
1309
-
1310
- if value == 'raise' :
1311
- raise SettingWithCopyError (t )
1312
- elif value == 'warn' :
1313
- warnings .warn (t , SettingWithCopyWarning , stacklevel = stacklevel )
1314
-
1315
1283
def __delitem__ (self , key ):
1316
1284
"""
1317
1285
Delete item
@@ -3376,11 +3344,11 @@ def resample(self, rule, how=None, axis=0, fill_method=None,
3376
3344
For frequencies that evenly subdivide 1 day, the "origin" of the
3377
3345
aggregated intervals. For example, for '5min' frequency, base could
3378
3346
range from 0 through 4. Defaults to 0
3379
-
3347
+
3380
3348
3381
3349
Examples
3382
3350
--------
3383
-
3351
+
3384
3352
Start by creating a series with 9 one minute timestamps.
3385
3353
3386
3354
>>> index = pd.date_range('1/1/2000', periods=9, freq='T')
@@ -3409,11 +3377,11 @@ def resample(self, rule, how=None, axis=0, fill_method=None,
3409
3377
Downsample the series into 3 minute bins as above, but label each
3410
3378
bin using the right edge instead of the left. Please note that the
3411
3379
value in the bucket used as the label is not included in the bucket,
3412
- which it labels. For example, in the original series the
3380
+ which it labels. For example, in the original series the
3413
3381
bucket ``2000-01-01 00:03:00`` contains the value 3, but the summed
3414
- value in the resampled bucket with the label``2000-01-01 00:03:00``
3382
+ value in the resampled bucket with the label``2000-01-01 00:03:00``
3415
3383
does not include 3 (if it did, the summed value would be 6, not 3).
3416
- To include this value close the right side of the bin interval as
3384
+ To include this value close the right side of the bin interval as
3417
3385
illustrated in the example below this one.
3418
3386
3419
3387
>>> series.resample('3T', how='sum', label='right')
@@ -3424,7 +3392,7 @@ def resample(self, rule, how=None, axis=0, fill_method=None,
3424
3392
3425
3393
Downsample the series into 3 minute bins as above, but close the right
3426
3394
side of the bin interval.
3427
-
3395
+
3428
3396
>>> series.resample('3T', how='sum', label='right', closed='right')
3429
3397
2000-01-01 00:00:00 0
3430
3398
2000-01-01 00:03:00 6
@@ -3453,7 +3421,7 @@ def resample(self, rule, how=None, axis=0, fill_method=None,
3453
3421
2000-01-01 00:02:00 2
3454
3422
Freq: 30S, dtype: int64
3455
3423
3456
- Upsample the series into 30 second bins and fill the
3424
+ Upsample the series into 30 second bins and fill the
3457
3425
``NaN`` values using the ``bfill`` method.
3458
3426
3459
3427
>>> series.resample('30S', fill_method='bfill')[0:5]
@@ -3468,7 +3436,7 @@ def resample(self, rule, how=None, axis=0, fill_method=None,
3468
3436
3469
3437
>>> def custom_resampler(array_like):
3470
3438
... return np.sum(array_like)+5
3471
-
3439
+
3472
3440
>>> series.resample('3T', how=custom_resampler)
3473
3441
2000-01-01 00:00:00 8
3474
3442
2000-01-01 00:03:00 17
0 commit comments