Skip to content

Commit ca8a6ba

Browse files
committed
BUG: make inplace semantics of DataFrame.where consistent. #2230
1 parent 564175e commit ca8a6ba

File tree

3 files changed

+35
-16
lines changed

3 files changed

+35
-16
lines changed

Diff for: RELEASE.rst

+2
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ pandas 0.9.1
5757
- Improve performance of Series/DataFrame.diff (re: #2087)
5858
- Support unary ~ (__invert__) in DataFrame (#2110)
5959
- Turn off pandas-style tick locators and formatters (#2205)
60+
- DataFrame[DataFrame] uses DataFrame.where to compute masked frame (#2230)
6061

6162
**Bug fixes**
6263

@@ -100,6 +101,7 @@ pandas 0.9.1
100101
- Fix improper MultiIndex conversion issue when assigning
101102
e.g. DataFrame.index (#2200)
102103
- Fix conversion of mixed-type DataFrame to ndarray with dup columns (#2236)
104+
- Fix duplicate columns issue (#2218, #2219)
103105
104106
pandas 0.9.0
105107
============

Diff for: pandas/core/frame.py

100755100644
+13-6
Original file line numberDiff line numberDiff line change
@@ -1884,7 +1884,7 @@ def _boolean_set(self, key, value):
18841884
if self._is_mixed_type:
18851885
raise ValueError('Cannot do boolean setting on mixed-type frame')
18861886

1887-
self.where(key, value, inplace=True)
1887+
self.where(-key, value, inplace=True)
18881888

18891889
def _set_item_multiple(self, keys, value):
18901890
if isinstance(value, DataFrame):
@@ -4872,7 +4872,6 @@ def where(self, cond, other=NA, inplace=False):
48724872
Return a DataFrame with the same shape as self and whose corresponding
48734873
entries are from self where cond is True and otherwise are from other.
48744874
4875-
48764875
Parameters
48774876
----------
48784877
cond: boolean DataFrame or array
@@ -4882,17 +4881,25 @@ def where(self, cond, other=NA, inplace=False):
48824881
-------
48834882
wh: DataFrame
48844883
"""
4885-
if not hasattr(cond,'shape'):
4886-
raise ValueError('where requires an ndarray like object for its condition')
4884+
if not hasattr(cond, 'shape'):
4885+
raise ValueError('where requires an ndarray like object for its '
4886+
'condition')
48874887

48884888
if isinstance(cond, np.ndarray):
48894889
if cond.shape != self.shape:
48904890
raise ValueError('Array onditional must be same shape as self')
48914891
cond = self._constructor(cond, index=self.index,
48924892
columns=self.columns)
4893+
48934894
if cond.shape != self.shape:
48944895
cond = cond.reindex(self.index, columns=self.columns)
4895-
cond = cond.fillna(False)
4896+
4897+
if inplace:
4898+
cond = -(cond.fillna(True).astype(bool))
4899+
else:
4900+
cond = cond.fillna(False).astype(bool)
4901+
elif inplace:
4902+
cond = -cond
48964903

48974904
if isinstance(other, DataFrame):
48984905
_, other = self.align(other, join='left', fill_value=NA)
@@ -4903,7 +4910,7 @@ def where(self, cond, other=NA, inplace=False):
49034910

49044911
rs = np.where(cond, self, other)
49054912
return self._constructor(rs, self.index, self.columns)
4906-
4913+
49074914
def mask(self, cond):
49084915
"""
49094916
Returns copy of self whose values are replaced with nan if the

Diff for: pandas/tests/test_frame.py

100755100644
+20-10
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,8 @@ def test_getitem_boolean(self):
143143

144144
# test df[df >0] works
145145
bif = self.tsframe[self.tsframe > 0]
146-
bifw = DataFrame(np.where(self.tsframe>0,self.tsframe,np.nan),index=self.tsframe.index,columns=self.tsframe.columns)
146+
bifw = DataFrame(np.where(self.tsframe > 0, self.tsframe, np.nan),
147+
index=self.tsframe.index,columns=self.tsframe.columns)
147148
self.assert_(isinstance(bif,DataFrame))
148149
self.assert_(bif.shape == self.tsframe.shape)
149150
assert_frame_equal(bif,bifw)
@@ -285,8 +286,8 @@ def test_setitem_boolean(self):
285286
assert_almost_equal(df.values, values)
286287

287288
# a df that needs alignment first
288-
df[df[:-1]<0] = 2
289-
np.putmask(values[:-1],values[:-1]<0,2)
289+
df[df[:-1] < 0] = 2
290+
np.putmask(values[:-1], values[:-1] < 0, 2)
290291
assert_almost_equal(df.values, values)
291292

292293
self.assertRaises(Exception, df.__setitem__, df * 0, 2)
@@ -5268,6 +5269,13 @@ def test_where(self):
52685269
self.assertRaises(ValueError, df.mask, True)
52695270
self.assertRaises(ValueError, df.mask, 0)
52705271

5272+
# where inplace
5273+
df = DataFrame(np.random.randn(5, 3))
5274+
5275+
expected = df.mask(df < 0)
5276+
df.where(df >= 0, np.nan, inplace=True)
5277+
assert_frame_equal(df, expected)
5278+
52715279
def test_mask(self):
52725280
df = DataFrame(np.random.randn(5, 3))
52735281
cond = df > 0
@@ -7232,13 +7240,15 @@ def test_xs_view(self):
72327240
def test_boolean_indexing(self):
72337241
idx = range(3)
72347242
cols = range(3)
7235-
df1 = DataFrame(index=idx, columns=cols, \
7236-
data=np.array([[0.0, 0.5, 1.0],
7237-
[1.5, 2.0, 2.5],
7238-
[3.0, 3.5, 4.0]], dtype=float))
7239-
df2 = DataFrame(index=idx, columns=cols, data=np.ones((len(idx), len(cols))))
7240-
7241-
expected = DataFrame(index=idx, columns=cols, \
7243+
df1 = DataFrame(index=idx, columns=cols,
7244+
data=np.array([[0.0, 0.5, 1.0],
7245+
[1.5, 2.0, 2.5],
7246+
[3.0, 3.5, 4.0]],
7247+
dtype=float))
7248+
df2 = DataFrame(index=idx, columns=cols,
7249+
data=np.ones((len(idx), len(cols))))
7250+
7251+
expected = DataFrame(index=idx, columns=cols,
72427252
data=np.array([[0.0, 0.5, 1.0],
72437253
[1.5, 2.0, -1],
72447254
[-1, -1, -1]], dtype=float))

0 commit comments

Comments
 (0)