-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
SparseSeries accepts scipy.sparse.spmatrix in constructor #16617
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
639fc6f
690a09f
9d6d2fe
3a12685
6bc8c8a
97da8bd
293bb47
47ef68a
ef03e73
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,6 +25,9 @@ Enhancements | |
has been added to return the group order (:issue:`11642`); see | ||
:ref:`here <groupby.ngroup>`. | ||
|
||
|
||
- ``SparseSeries`` and ``SparseArray`` now support 1d ``scipy.sparse.spmatrix`` in constructor. Additionally, ``SparseDataFrame`` can be assigned columns of ``scipy.sparse.spmatrix``; see :ref:`here <sparse.scipysparse_series>`. (:issue:`15634`) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. will be for 0.21.0 |
||
|
||
.. _whatsnew_0202.performance: | ||
|
||
Performance Improvements | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -433,6 +433,15 @@ def __getitem__(self, key): | |
else: | ||
return self._get_item_cache(key) | ||
|
||
def __setitem__(self, key, value): | ||
if is_scipy_sparse(value): | ||
if any(ax == 1 for ax in value.shape): # 1d spmatrix | ||
value = SparseArray(value, fill_value=self._default_fill_value) | ||
else: | ||
# 2d; make it iterable | ||
value = list(value.tocsc().T) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. does this materialize? |
||
super().__setitem__(key, value) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use the fully qualified call |
||
|
||
@Appender(DataFrame.get_value.__doc__, indents=0) | ||
def get_value(self, index, col, takeable=False): | ||
if takeable is True: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,7 +9,7 @@ | |
import warnings | ||
|
||
from pandas.core.dtypes.missing import isnull, notnull | ||
from pandas.core.dtypes.common import is_scalar | ||
from pandas.core.dtypes.common import is_scalar, is_scipy_sparse | ||
from pandas.core.common import _values_from_object, _maybe_match_name | ||
|
||
from pandas.compat.numpy import function as nv | ||
|
@@ -90,7 +90,7 @@ class SparseSeries(Series): | |
|
||
Parameters | ||
---------- | ||
data : {array-like, Series, SparseSeries, dict} | ||
data : {array-like, Series, SparseSeries, dict, scipy.sparse.spmatrix} | ||
kind : {'block', 'integer'} | ||
fill_value : float | ||
Code for missing value. Defaults depends on dtype. | ||
|
@@ -128,6 +128,10 @@ def __init__(self, data=None, index=None, sparse_index=None, kind='block', | |
if isinstance(data, Series) and name is None: | ||
name = data.name | ||
|
||
if is_scipy_sparse(data): | ||
data = SparseArray(data, dtype=dtype, kind=kind, | ||
fill_value=fill_value) | ||
|
||
if isinstance(data, SparseArray): | ||
if index is not None: | ||
assert (len(index) == len(data)) | ||
|
@@ -722,6 +726,9 @@ def combine_first(self, other): | |
|
||
def to_coo(self, row_levels=(0, ), column_levels=(1, ), sort_labels=False): | ||
""" | ||
DEPRECATED; instead, make a SparseSeries with a two-level index, | ||
unstack it, then use .to_coo() on the resulting SparseDataFrame. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use the deprecated sphinx directive (I think we are changing these all over) |
||
|
||
Create a scipy.sparse.coo_matrix from a SparseSeries with MultiIndex. | ||
|
||
Use row_levels and column_levels to determine the row and column | ||
|
@@ -779,6 +786,9 @@ def to_coo(self, row_levels=(0, ), column_levels=(1, ), sort_labels=False): | |
@classmethod | ||
def from_coo(cls, A, dense_index=False): | ||
""" | ||
DEPRECATED; instead, pass 1d scipy.sparse matrices directly into | ||
SparseSeries constructor, and 2d into SparseDataFrame constructor. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same |
||
|
||
Create a SparseSeries from a scipy.sparse.coo_matrix. | ||
|
||
.. versionadded:: 0.16.0 | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -540,6 +540,33 @@ def test_setitem_array(self): | |
self.frame['F'].reindex(index), | ||
check_names=False) | ||
|
||
def test_setitem_spmatrix(self): | ||
# GH-15634 | ||
tm.skip_if_no_package('scipy') | ||
from scipy.sparse import csr_matrix | ||
|
||
sdf = self.frame.copy(False) | ||
|
||
# 1d -- column | ||
spm = csr_matrix(np.arange(len(sdf))).T | ||
sdf['X'] = spm | ||
assert (sdf[['X']].to_coo() != spm.tocoo()).nnz == 0 | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this comparision on the scipy side is fine, but also let's compare with assert_sparse_series/frame_equal |
||
# 1d -- existing column | ||
sdf['A'] = spm.T | ||
assert (sdf[['X']].to_coo() != spm.tocoo()).nnz == 0 | ||
|
||
# 1d row -- changing series contents not yet supported | ||
spm = csr_matrix(np.arange(sdf.shape[1])).astype(float) | ||
idx = np.r_[[False, True], np.full(sdf.shape[0] - 2, False)] | ||
tm.assert_raises_regex(TypeError, 'assignment', | ||
lambda: sdf.__setitem__(idx, spm)) | ||
|
||
# 2d -- 2 columns | ||
spm = csr_matrix(np.eye(len(sdf))[:, :2]) | ||
sdf[['X', 'A']] = spm | ||
assert (sdf[['X', 'A']].to_coo() != spm.tocoo()).nnz == 0 | ||
|
||
def test_delitem(self): | ||
A = self.frame['A'] | ||
C = self.frame['C'] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
say that this is deprecated in 0.21.0