Skip to content

BUG: incorrect handling of scipy.sparse.dok formats #16191

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 11, 2017
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions doc/source/whatsnew/v0.20.2.txt
Original file line number Diff line number Diff line change
@@ -66,8 +66,7 @@ Groupby/Resample/Rolling
Sparse
^^^^^^



- Bug in construction of SparseDataFrame from ``scipy.sparse.dok_matrix`` (:issue:`16179`)

Reshaping
^^^^^^^^^
2 changes: 1 addition & 1 deletion pandas/core/sparse/frame.py
Original file line number Diff line number Diff line change
@@ -190,8 +190,8 @@ def _init_spmatrix(self, data, index, columns, dtype=None,
values = Series(data.data, index=data.row, copy=False)
for col, rowvals in values.groupby(data.col):
# get_blocks expects int32 row indices in sorted order
rowvals = rowvals.sort_index()
rows = rowvals.index.values.astype(np.int32)
rows.sort()
blocs, blens = get_blocks(rows)

sdict[columns[col]] = SparseSeries(
28 changes: 25 additions & 3 deletions pandas/tests/sparse/test_frame.py
Original file line number Diff line number Diff line change
@@ -1146,8 +1146,8 @@ def test_isnotnull(self):
tm.assert_frame_equal(res.to_dense(), exp)


@pytest.mark.parametrize('index', [None, list('ab')]) # noqa: F811
@pytest.mark.parametrize('columns', [None, list('cd')])
@pytest.mark.parametrize('index', [None, list('abc')]) # noqa: F811
@pytest.mark.parametrize('columns', [None, list('def')])
@pytest.mark.parametrize('fill_value', [None, 0, np.nan])
@pytest.mark.parametrize('dtype', [bool, int, float, np.uint16])
def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype):
@@ -1156,7 +1156,9 @@ def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype):

# Make one ndarray and from it one sparse matrix, both to be used for
# constructing frames and comparing results
arr = np.eye(2, dtype=dtype)
arr = np.eye(3, dtype=dtype)
# GH 16179
arr[0, 1] = dtype(2)
try:
spm = spmatrix(arr)
assert spm.dtype == arr.dtype
@@ -1245,6 +1247,26 @@ def test_from_to_scipy_object(spmatrix, fill_value):
assert sdf.to_coo().dtype == res_dtype


def test_from_scipy_correct_ordering(spmatrix):
# GH 16179
tm.skip_if_no_package('scipy')

arr = np.arange(1, 5).reshape(2, 2)
try:
spm = spmatrix(arr)
assert spm.dtype == arr.dtype
except (TypeError, AssertionError):
# If conversion to sparse fails for this spmatrix type and arr.dtype,
# then the combination is not currently supported in NumPy, so we
# can just skip testing it thoroughly
return

sdf = pd.SparseDataFrame(spm)
expected = pd.SparseDataFrame(arr)
tm.assert_sp_frame_equal(sdf, expected)
tm.assert_frame_equal(sdf.to_dense(), expected.to_dense())


class TestSparseDataFrameArithmetic(object):

def test_numeric_op_scalar(self):