pandas-dev · kernc · Jun 5, 2017 · Jun 5, 2017 · Jun 5, 2017 · Jun 5, 2017
diff --git a/doc/source/sparse.rst b/doc/source/sparse.rst
@@ -213,9 +213,28 @@ To convert a ``SparseDataFrame`` back to sparse SciPy matrix in COO format, you
 
    sdf.to_coo()
 
+.. _sparse.scipysparse_series:
+
 SparseSeries
 ~~~~~~~~~~~~
 
+.. versionadded:: 0.20.2
+
+``SparseSeries``, ``SparseArray`` can be constructed from ``scipy.sparse.spmatrix`` objects of shape ``(1, n)`` or ``(n, 1)``.
+SciPy sparse matrices can also be assigned directly to a ``SparseDataFrame`` with an index.
+
+.. ipython:: python
+
+   sa = pd.SparseSeries(sp_arr[:, 5])
+   sa
+
+   sdf['x'] = sa
+   sdf['y'] = sp_arr[:, 6]
+   sdf[['z', 'w']] = sp_arr[:, [7, 8]]
+   sdf.iloc[:, -5:]
+
+Below interface is deprecated.
+
 .. versionadded:: 0.16.0
 
 A :meth:`SparseSeries.to_coo` method is implemented for transforming a ``SparseSeries`` indexed by a ``MultiIndex`` to a ``scipy.sparse.coo_matrix``.

diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt
@@ -25,6 +25,9 @@ Enhancements
   has been added to return the group order (:issue:`11642`); see
   :ref:`here <groupby.ngroup>`.
 
+
+- ``SparseSeries`` and ``SparseArray`` now support 1d ``scipy.sparse.spmatrix`` in constructor. Additionally, ``SparseDataFrame`` can be assigned columns of ``scipy.sparse.spmatrix``; see :ref:`here <sparse.scipysparse_series>`. (:issue:`15634`)
+
 .. _whatsnew_0202.performance:
 
 Performance Improvements

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -554,7 +554,7 @@ def can_do_equal_len():
                         setter(item, v)
 
                 # we have an equal len ndarray/convertible to our labels
-                elif np.array(value).ndim == 2:
+                elif np.asanyarray(value).ndim == 2:
 
                     # note that this coerces the dtype if we are mixed
                     # GH 7551

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -1634,7 +1634,7 @@ class ComplexBlock(FloatOrComplexBlock):
 
     def _can_hold_element(self, element):
         if is_list_like(element):
-            element = np.array(element)
+            element = np.asanyarray(element)
             return issubclass(element.dtype.type,
                               (np.floating, np.integer, np.complexfloating))
         return (isinstance(element,
@@ -1658,7 +1658,7 @@ class IntBlock(NumericBlock):
 
     def _can_hold_element(self, element):
         if is_list_like(element):
-            element = np.array(element)
+            element = np.asanyarray(element)
             tipo = element.dtype.type
             return (issubclass(tipo, np.integer) and
                     not issubclass(tipo, (np.datetime64, np.timedelta64)))
@@ -1805,7 +1805,7 @@ class BoolBlock(NumericBlock):
 
     def _can_hold_element(self, element):
         if is_list_like(element):
-            element = np.array(element)
+            element = np.asanyarray(element)
             return issubclass(element.dtype.type, np.integer)
         return isinstance(element, (int, bool))
 
@@ -2571,6 +2571,16 @@ def _astype(self, dtype, copy=False, raise_on_error=True, values=None,
         return self.make_block_same_class(values=values,
                                           placement=self.mgr_locs)
 
+    def _can_hold_element(self, element):
+        element = np.asanyarray(element)
+        return np.issubdtype(element.dtype, self.sp_values.dtype)
+
+    def _try_cast(self, element):
+        try:
+            return np.asarray(element, dtype=self.sp_values.dtype)
+        except ValueError:
+            return element
+
     def __len__(self):
         try:
             return self.sp_index.length

diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py
@@ -23,7 +23,8 @@
     is_bool_dtype,
     is_list_like,
     is_string_dtype,
-    is_scalar, is_dtype_equal)
+    is_scalar, is_dtype_equal,
+    is_scipy_sparse)
 from pandas.core.dtypes.cast import (
     maybe_convert_platform, maybe_promote,
     astype_nansafe, find_common_type)
@@ -164,11 +165,13 @@ class SparseArray(PandasObject, np.ndarray):
 
     Parameters
     ----------
-    data : {array-like (1-D), Series, SparseSeries, dict}
+    data : {array-like (1-D), Series, SparseSeries, dict, \
+            scipy.sparse.spmatrix}
     kind : {'block', 'integer'}
     fill_value : float
         Code for missing value. Defaults depends on dtype.
-        0 for int dtype, False for bool dtype, and NaN for other dtypes
+        0 for int dtype or scipy sparse matrix, False for bool dtype, and NaN
+        for other dtypes
     sparse_index : {BlockIndex, IntIndex}, optional
         Only if you have one. Mainly used internally
 
@@ -197,17 +200,27 @@ def __new__(cls, data, sparse_index=None, index=None, kind='integer',
             values.fill(data)
             data = values
 
-        if isinstance(data, ABCSparseSeries):
-            data = data.values
-        is_sparse_array = isinstance(data, SparseArray)
-
         if dtype is not None:
             dtype = np.dtype(dtype)
 
-        if is_sparse_array:
+        if isinstance(data, ABCSparseSeries):
+            data = data.values
+
+        if isinstance(data, SparseArray):
             sparse_index = data.sp_index
             values = data.sp_values
             fill_value = data.fill_value
+        elif is_scipy_sparse(data):
+            if not any(ax == 1 for ax in data.shape):
+                raise ValueError('Need 1D sparse matrix shaped '
+                                 '(n, 1) or (1, n)')
+            coo = data.tocoo()
+            values = coo.data
+            indices = coo.row if coo.shape[0] != 1 else coo.col
+            sparse_index = _make_index(max(coo.shape), indices, kind)
+            # SciPy Sparse matrices imply missing value = 0
+            if fill_value is None:
+                fill_value = 0
         else:
             # array-like
             if sparse_index is None:

diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py
@@ -433,6 +433,15 @@ def __getitem__(self, key):
         else:
             return self._get_item_cache(key)
 
+    def __setitem__(self, key, value):
+        if is_scipy_sparse(value):
+            if any(ax == 1 for ax in value.shape):  # 1d spmatrix
+                value = SparseArray(value, fill_value=self._default_fill_value)
+            else:
+                # 2d; make it iterable
+                value = list(value.tocsc().T)
+        super().__setitem__(key, value)
+
     @Appender(DataFrame.get_value.__doc__, indents=0)
     def get_value(self, index, col, takeable=False):
         if takeable is True:

diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py
@@ -9,7 +9,7 @@
 import warnings
 
 from pandas.core.dtypes.missing import isnull, notnull
-from pandas.core.dtypes.common import is_scalar
+from pandas.core.dtypes.common import is_scalar, is_scipy_sparse
 from pandas.core.common import _values_from_object, _maybe_match_name
 
 from pandas.compat.numpy import function as nv
@@ -90,7 +90,7 @@ class SparseSeries(Series):
 
     Parameters
     ----------
-    data : {array-like, Series, SparseSeries, dict}
+    data : {array-like, Series, SparseSeries, dict, scipy.sparse.spmatrix}
     kind : {'block', 'integer'}
     fill_value : float
         Code for missing value. Defaults depends on dtype.
@@ -128,6 +128,10 @@ def __init__(self, data=None, index=None, sparse_index=None, kind='block',
             if isinstance(data, Series) and name is None:
                 name = data.name
 
+            if is_scipy_sparse(data):
+                data = SparseArray(data, dtype=dtype, kind=kind,
+                                   fill_value=fill_value)
+
             if isinstance(data, SparseArray):
                 if index is not None:
                     assert (len(index) == len(data))
@@ -722,6 +726,9 @@ def combine_first(self, other):
 
     def to_coo(self, row_levels=(0, ), column_levels=(1, ), sort_labels=False):
         """
+        DEPRECATED; instead, make a SparseSeries with a two-level index,
+        unstack it, then use .to_coo() on the resulting SparseDataFrame.
+
         Create a scipy.sparse.coo_matrix from a SparseSeries with MultiIndex.
 
         Use row_levels and column_levels to determine the row and column
@@ -779,6 +786,9 @@ def to_coo(self, row_levels=(0, ), column_levels=(1, ), sort_labels=False):
     @classmethod
     def from_coo(cls, A, dense_index=False):
         """
+        DEPRECATED; instead, pass 1d scipy.sparse matrices directly into
+        SparseSeries constructor, and 2d into SparseDataFrame constructor.
+
         Create a SparseSeries from a scipy.sparse.coo_matrix.
 
         .. versionadded:: 0.16.0

diff --git a/pandas/tests/sparse/test_array.py b/pandas/tests/sparse/test_array.py
@@ -105,6 +105,24 @@ def test_constructor_spindex_dtype(self):
         assert arr.dtype == np.int64
         assert arr.fill_value == 0
 
+    def test_constructor_spmatrix(self):
+        # GH-15634
+        tm.skip_if_no_package('scipy')
+        from scipy.sparse import csr_matrix
+
+        spm = csr_matrix(np.arange(5))
+
+        arr = SparseArray(spm)
+        assert arr.dtype == spm.dtype
+        assert arr.fill_value == 0
+
+        arr = SparseArray(spm, kind='block', dtype=float, fill_value=np.nan)
+        assert arr.dtype == float
+        assert np.isnan(arr.fill_value)
+
+        tm.assert_raises_regex(ValueError, '1D',
+                               lambda: SparseArray(csr_matrix(np.eye(3))))
+
     def test_sparseseries_roundtrip(self):
         # GH 13999
         for kind in ['integer', 'block']:

diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py
@@ -540,6 +540,33 @@ def test_setitem_array(self):
                                   self.frame['F'].reindex(index),
                                   check_names=False)
 
+    def test_setitem_spmatrix(self):
+        # GH-15634
+        tm.skip_if_no_package('scipy')
+        from scipy.sparse import csr_matrix
+
+        sdf = self.frame.copy(False)
+
+        # 1d -- column
+        spm = csr_matrix(np.arange(len(sdf))).T
+        sdf['X'] = spm
+        assert (sdf[['X']].to_coo() != spm.tocoo()).nnz == 0
+
+        # 1d -- existing column
+        sdf['A'] = spm.T
+        assert (sdf[['X']].to_coo() != spm.tocoo()).nnz == 0
+
+        # 1d row -- changing series contents not yet supported
+        spm = csr_matrix(np.arange(sdf.shape[1])).astype(float)
+        idx = np.r_[[False, True], np.full(sdf.shape[0] - 2, False)]
+        tm.assert_raises_regex(TypeError, 'assignment',
+                               lambda: sdf.__setitem__(idx, spm))
+
+        # 2d -- 2 columns
+        spm = csr_matrix(np.eye(len(sdf))[:, :2])
+        sdf[['X', 'A']] = spm
+        assert (sdf[['X', 'A']].to_coo() != spm.tocoo()).nnz == 0
+
     def test_delitem(self):
         A = self.frame['A']
         C = self.frame['C']

diff --git a/pandas/tests/sparse/test_series.py b/pandas/tests/sparse/test_series.py
@@ -142,6 +142,24 @@ def test_constructor_preserve_attr(self):
         assert s.dtype == np.int64
         assert s.fill_value == 0
 
+    def test_constructor_spmatrix(self):
+        # GH-15634
+        tm.skip_if_no_package('scipy')
+        from scipy.sparse import csr_matrix
+
+        spm = csr_matrix(np.eye(5)[:, 2])
+
+        arr = SparseSeries(spm)
+        assert arr.dtype == spm.dtype
+        assert arr.fill_value == 0
+
+        arr = SparseSeries(spm, kind='block', dtype=float, fill_value=np.nan)
+        assert arr.dtype == float
+        assert np.isnan(arr.fill_value)
+
+        tm.assert_raises_regex(ValueError, '1D',
+                               lambda: SparseSeries(csr_matrix(np.eye(3))))
+
     def test_series_density(self):
         # GH2803
         ts = Series(np.random.randn(10))