Skip to content

Commit 3cc4198

Browse files
sinhrksjreback
authored andcommitted
BUG: SparseArray numeric ops misc fixes
Fixed following 3 issues occurred on the current master. Author: sinhrks <[email protected]> Closes #12910 from sinhrks/sparse_ops and squashes the following commits: d63da47 [sinhrks] BUG: SparseArray misc fixes
1 parent a6d7698 commit 3cc4198

File tree

5 files changed

+97
-225
lines changed

5 files changed

+97
-225
lines changed

Diff for: doc/source/whatsnew/v0.18.1.txt

+3
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,9 @@ These changes conform sparse handling to return the correct types and work to ma
114114
- Bug in ``SparseSeries.iloc[]`` with scalar input may raise ``IndexError`` (:issue:`10560`)
115115
- Bug in ``SparseSeries.loc[]``, ``.iloc[]`` with ``slice`` returns ``SparseArray``, rather than ``SparseSeries`` (:issue:`10560`)
116116
- Bug in ``SparseDataFrame.loc[]``, ``.iloc[]`` may results in dense ``Series``, rather than ``SparseSeries`` (:issue:`12787`)
117+
- Bug in ``SparseArray`` addition ignores ``fill_value`` of right hand side (:issue:`12910`)
118+
- Bug in ``SparseArray`` mod raises ``AttributeError (:issue:`12910`)
119+
- Bug in ``SparseArray`` pow calculates ``1 ** np.nan`` as ``np.nan`` which must be 1 (:issue:`12910`)
117120
- Bug in ``SparseSeries.__repr__`` raises ``TypeError`` when it is longer than ``max_rows`` (:issue:`10560`)
118121
- Bug in ``SparseSeries.shape`` ignores ``fill_value`` (:issue:`10452`)
119122
- Bug in ``SparseSeries.reindex`` incorrectly handle ``fill_value`` (:issue:`12797`)

Diff for: pandas/sparse/array.py

+2-13
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,7 @@ def wrapper(self, other):
5959

6060

6161
def _sparse_array_op(left, right, op, name):
62-
if np.isnan(left.fill_value):
63-
sparse_op = lambda a, b: _sparse_nanop(a, b, name)
64-
else:
65-
sparse_op = lambda a, b: _sparse_fillop(a, b, name)
62+
sparse_op = lambda a, b: _sparse_op(a, b, name)
6663

6764
if left.sp_index.equals(right.sp_index):
6865
result = op(left.sp_values, right.sp_values)
@@ -79,15 +76,7 @@ def _sparse_array_op(left, right, op, name):
7976
fill_value=fill_value)
8077

8178

82-
def _sparse_nanop(this, other, name):
83-
sparse_op = getattr(splib, 'sparse_nan%s' % name)
84-
result, result_index = sparse_op(this.sp_values, this.sp_index,
85-
other.sp_values, other.sp_index)
86-
87-
return result, result_index
88-
89-
90-
def _sparse_fillop(this, other, name):
79+
def _sparse_op(this, other, name):
9180
sparse_op = getattr(splib, 'sparse_%s' % name)
9281
result, result_index = sparse_op(this.sp_values, this.sp_index,
9382
this.fill_value, other.sp_values,

Diff for: pandas/sparse/tests/test_array.py

+63
Original file line numberDiff line numberDiff line change
@@ -543,6 +543,69 @@ def test_fillna_overlap(self):
543543
tm.assert_sp_array_equal(res, exp)
544544

545545

546+
class TestSparseArrayArithmetic(tm.TestCase):
547+
548+
_multiprocess_can_split_ = True
549+
550+
def _check_numeric_ops(self, a, b, a_dense, b_dense):
551+
tm.assert_numpy_array_equal((a + b).to_dense(), a_dense + b_dense)
552+
tm.assert_numpy_array_equal((b + a).to_dense(), b_dense + a_dense)
553+
554+
tm.assert_numpy_array_equal((a - b).to_dense(), a_dense - b_dense)
555+
tm.assert_numpy_array_equal((b - a).to_dense(), b_dense - a_dense)
556+
557+
tm.assert_numpy_array_equal((a * b).to_dense(), a_dense * b_dense)
558+
tm.assert_numpy_array_equal((b * a).to_dense(), b_dense * a_dense)
559+
560+
tm.assert_numpy_array_equal((a / b).to_dense(), a_dense / b_dense)
561+
tm.assert_numpy_array_equal((b / a).to_dense(), b_dense / a_dense)
562+
563+
tm.assert_numpy_array_equal((a // b).to_dense(), a_dense // b_dense)
564+
tm.assert_numpy_array_equal((b // a).to_dense(), b_dense // a_dense)
565+
566+
tm.assert_numpy_array_equal((a % b).to_dense(), a_dense % b_dense)
567+
tm.assert_numpy_array_equal((b % a).to_dense(), b_dense % a_dense)
568+
569+
tm.assert_numpy_array_equal((a ** b).to_dense(), a_dense ** b_dense)
570+
tm.assert_numpy_array_equal((b ** a).to_dense(), b_dense ** a_dense)
571+
572+
def test_float_scalar(self):
573+
values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
574+
575+
a = SparseArray(values)
576+
self._check_numeric_ops(a, 1, values, 1)
577+
self._check_numeric_ops(a, 0, values, 0)
578+
579+
a = SparseArray(values, fill_value=0)
580+
self._check_numeric_ops(a, 1, values, 1)
581+
self._check_numeric_ops(a, 0, values, 0)
582+
583+
a = SparseArray(values, fill_value=2)
584+
self._check_numeric_ops(a, 1, values, 1)
585+
self._check_numeric_ops(a, 0, values, 0)
586+
587+
def test_float_array(self):
588+
values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
589+
rvalues = np.array([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
590+
591+
a = SparseArray(values)
592+
b = SparseArray(rvalues)
593+
self._check_numeric_ops(a, b, values, rvalues)
594+
self._check_numeric_ops(a, b * 0, values, rvalues * 0)
595+
596+
a = SparseArray(values, fill_value=0)
597+
b = SparseArray(rvalues)
598+
self._check_numeric_ops(a, b, values, rvalues)
599+
600+
a = SparseArray(values, fill_value=0)
601+
b = SparseArray(rvalues, fill_value=0)
602+
self._check_numeric_ops(a, b, values, rvalues)
603+
604+
a = SparseArray(values, fill_value=1)
605+
b = SparseArray(rvalues, fill_value=2)
606+
self._check_numeric_ops(a, b, values, rvalues)
607+
608+
546609
if __name__ == '__main__':
547610
import nose
548611
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],

Diff for: pandas/sparse/tests/test_libsparse.py

+1-38
Original file line numberDiff line numberDiff line change
@@ -269,31 +269,6 @@ def test_to_int_index(self):
269269

270270

271271
class TestSparseOperators(tm.TestCase):
272-
def _nan_op_tests(self, sparse_op, python_op):
273-
def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
274-
xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
275-
yindex = BlockIndex(TEST_LENGTH, yloc, ylen)
276-
277-
xdindex = xindex.to_int_index()
278-
ydindex = yindex.to_int_index()
279-
280-
x = np.arange(xindex.npoints) * 10. + 1
281-
y = np.arange(yindex.npoints) * 100. + 1
282-
283-
result_block_vals, rb_index = sparse_op(x, xindex, y, yindex)
284-
result_int_vals, ri_index = sparse_op(x, xdindex, y, ydindex)
285-
286-
self.assertTrue(rb_index.to_int_index().equals(ri_index))
287-
assert_equal(result_block_vals, result_int_vals)
288-
289-
# check versus Series...
290-
xseries = Series(x, xdindex.indices)
291-
yseries = Series(y, ydindex.indices)
292-
series_result = python_op(xseries, yseries).valid()
293-
assert_equal(result_block_vals, series_result.values)
294-
assert_equal(result_int_vals, series_result.values)
295-
296-
check_cases(_check_case)
297272

298273
def _op_tests(self, sparse_op, python_op):
299274
def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
@@ -337,16 +312,6 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
337312
check_ops = ['add', 'sub', 'mul', 'truediv', 'floordiv']
338313

339314

340-
def make_nanoptestf(op):
341-
def f(self):
342-
sparse_op = getattr(splib, 'sparse_nan%s' % op)
343-
python_op = getattr(operator, op)
344-
self._nan_op_tests(sparse_op, python_op)
345-
346-
f.__name__ = 'test_nan%s' % op
347-
return f
348-
349-
350315
def make_optestf(op):
351316
def f(self):
352317
sparse_op = getattr(splib, 'sparse_%s' % op)
@@ -358,13 +323,11 @@ def f(self):
358323

359324

360325
for op in check_ops:
361-
f = make_nanoptestf(op)
362326
g = make_optestf(op)
363-
setattr(TestSparseOperators, f.__name__, f)
364327
setattr(TestSparseOperators, g.__name__, g)
365-
del f
366328
del g
367329

330+
368331
if __name__ == '__main__':
369332
import nose # noqa
370333
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],

0 commit comments

Comments
 (0)