Skip to content

Commit 02be419

Browse files
keitakuritaTomAugspurger
authored andcommitted
BUG: incorrect handling of scipy.sparse.dok formats (#16197) (#16191)
(cherry picked from commit 1c0b632)
1 parent e346c66 commit 02be419

File tree

3 files changed

+27
-6
lines changed

3 files changed

+27
-6
lines changed

Diff for: doc/source/whatsnew/v0.20.2.txt

+1-2
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,7 @@ Groupby/Resample/Rolling
6666
Sparse
6767
^^^^^^
6868

69-
70-
69+
- Bug in construction of SparseDataFrame from ``scipy.sparse.dok_matrix`` (:issue:`16179`)
7170

7271
Reshaping
7372
^^^^^^^^^

Diff for: pandas/core/sparse/frame.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -190,8 +190,8 @@ def _init_spmatrix(self, data, index, columns, dtype=None,
190190
values = Series(data.data, index=data.row, copy=False)
191191
for col, rowvals in values.groupby(data.col):
192192
# get_blocks expects int32 row indices in sorted order
193+
rowvals = rowvals.sort_index()
193194
rows = rowvals.index.values.astype(np.int32)
194-
rows.sort()
195195
blocs, blens = get_blocks(rows)
196196

197197
sdict[columns[col]] = SparseSeries(

Diff for: pandas/tests/sparse/test_frame.py

+25-3
Original file line numberDiff line numberDiff line change
@@ -1146,8 +1146,8 @@ def test_isnotnull(self):
11461146
tm.assert_frame_equal(res.to_dense(), exp)
11471147

11481148

1149-
@pytest.mark.parametrize('index', [None, list('ab')]) # noqa: F811
1150-
@pytest.mark.parametrize('columns', [None, list('cd')])
1149+
@pytest.mark.parametrize('index', [None, list('abc')]) # noqa: F811
1150+
@pytest.mark.parametrize('columns', [None, list('def')])
11511151
@pytest.mark.parametrize('fill_value', [None, 0, np.nan])
11521152
@pytest.mark.parametrize('dtype', [bool, int, float, np.uint16])
11531153
def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype):
@@ -1156,7 +1156,9 @@ def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype):
11561156

11571157
# Make one ndarray and from it one sparse matrix, both to be used for
11581158
# constructing frames and comparing results
1159-
arr = np.eye(2, dtype=dtype)
1159+
arr = np.eye(3, dtype=dtype)
1160+
# GH 16179
1161+
arr[0, 1] = dtype(2)
11601162
try:
11611163
spm = spmatrix(arr)
11621164
assert spm.dtype == arr.dtype
@@ -1245,6 +1247,26 @@ def test_from_to_scipy_object(spmatrix, fill_value):
12451247
assert sdf.to_coo().dtype == res_dtype
12461248

12471249

1250+
def test_from_scipy_correct_ordering(spmatrix):
1251+
# GH 16179
1252+
tm.skip_if_no_package('scipy')
1253+
1254+
arr = np.arange(1, 5).reshape(2, 2)
1255+
try:
1256+
spm = spmatrix(arr)
1257+
assert spm.dtype == arr.dtype
1258+
except (TypeError, AssertionError):
1259+
# If conversion to sparse fails for this spmatrix type and arr.dtype,
1260+
# then the combination is not currently supported in NumPy, so we
1261+
# can just skip testing it thoroughly
1262+
return
1263+
1264+
sdf = pd.SparseDataFrame(spm)
1265+
expected = pd.SparseDataFrame(arr)
1266+
tm.assert_sp_frame_equal(sdf, expected)
1267+
tm.assert_frame_equal(sdf.to_dense(), expected.to_dense())
1268+
1269+
12481270
class TestSparseDataFrameArithmetic(object):
12491271

12501272
def test_numeric_op_scalar(self):

0 commit comments

Comments
 (0)