Skip to content

Commit a67ae98

Browse files
committed
Deprecate custom CV shims in documentation and code. Refs #11.
1 parent d3fb586 commit a67ae98

File tree

4 files changed

+60
-15
lines changed

4 files changed

+60
-15
lines changed

README.rst

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ This module provides a bridge between `Scikit-Learn <http://scikit-learn.org/sta
77
In particular, it provides:
88

99
1. A way to map ``DataFrame`` columns to transformations, which are later recombined into features.
10-
2. A way to cross-validate a pipeline that takes a pandas ``DataFrame`` as input.
10+
2. A compatibility shim for old ``scikit-learn`` versions to cross-validate a pipeline that takes a pandas ``DataFrame`` as input. This is only needed for ``scikit-learn<0.16.0`` (see `#11 <https://github.com/paulgb/sklearn-pandas/issues/11>`__ for details). It is deprecated and will likely be dropped in ``skearn-pandas==2.0``.
1111

1212
Installation
1313
------------
@@ -174,7 +174,7 @@ The stacking of the sparse features is done without ever densifying them.
174174
Cross-Validation
175175
----------------
176176

177-
Now that we can combine features from pandas DataFrames, we may want to use cross-validation to see whether our model works. Scikit-learn provides features for cross-validation, but they expect numpy data structures and won't work with ``DataFrameMapper``.
177+
Now that we can combine features from pandas DataFrames, we may want to use cross-validation to see whether our model works. ``scikit-learn<0.16.0`` provided features for cross-validation, but they expect numpy data structures and won't work with ``DataFrameMapper``.
178178

179179
To get around this, sklearn-pandas provides a wrapper on sklearn's ``cross_val_score`` function which passes a pandas DataFrame to the estimator rather than a numpy array::
180180

@@ -190,6 +190,12 @@ Sklearn-pandas' ``cross_val_score`` function provides exactly the same interface
190190
Changelog
191191
---------
192192

193+
Development
194+
***********
195+
196+
* Deprecate custom cross-validation shim classes.
197+
198+
193199
1.1.0 (2015-12-06)
194200
*******************
195201

sklearn_pandas/cross_validation.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,24 @@
1+
import warnings
12
from sklearn import cross_validation
23
from sklearn import grid_search
34

5+
deprecation_msg = '''
6+
Custom cross-validation compatibility shims are no longer needed for
7+
scikit-learn>=0.16.0 and will be dropped in sklearn-pandas==2.0.
8+
'''
9+
410

511
def cross_val_score(model, X, *args, **kwargs):
12+
warnings.warn(deprecation_msg, DeprecationWarning)
613
X = DataWrapper(X)
714
return cross_validation.cross_val_score(model, X, *args, **kwargs)
815

916

1017
class GridSearchCV(grid_search.GridSearchCV):
18+
def __init__(self, *args, **kwargs):
19+
warnings.warn(deprecation_msg, DeprecationWarning)
20+
super(GridSearchCV, self).__init__(*args, **kwargs)
21+
1122
def fit(self, X, *params, **kwparams):
1223
return super(GridSearchCV, self).fit(DataWrapper(X), *params, **kwparams)
1324

@@ -17,6 +28,10 @@ def predict(self, X, *params, **kwparams):
1728

1829
try:
1930
class RandomizedSearchCV(grid_search.RandomizedSearchCV):
31+
def __init__(self, *args, **kwargs):
32+
warnings.warn(deprecation_msg, DeprecationWarning)
33+
super(RandomizedSearchCV, self).__init__(*args, **kwargs)
34+
2035
def fit(self, X, *params, **kwparams):
2136
return super(RandomizedSearchCV, self).fit(DataWrapper(X), *params, **kwparams)
2237

tests/test_dataframe_mapper.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import pytest
2+
from pkg_resources import parse_version
23

34
# In py3, mock is included with the unittest standard library
45
# In py2, it's a separate package
@@ -10,6 +11,8 @@
1011
from pandas import DataFrame
1112
import pandas as pd
1213
from scipy import sparse
14+
from sklearn import __version__ as sklearn_version
15+
from sklearn.cross_validation import cross_val_score as sklearn_cv_score
1316
from sklearn.datasets import load_iris
1417
from sklearn.pipeline import Pipeline
1518
from sklearn.svm import SVC
@@ -277,3 +280,25 @@ def test_with_car_dataframe(cars_dataframe):
277280
labels = cars_dataframe["model"]
278281
scores = cross_val_score(pipeline, data, labels)
279282
assert scores.mean() > 0.30
283+
284+
285+
@pytest.mark.skipIf(parse_version(sklearn_version) < parse_version('0.16'))
286+
def test_direct_cross_validation(iris_dataframe):
287+
"""
288+
Starting with sklearn>=0.16.0 we no longer need CV wrappers for dataframes.
289+
See https://github.com/paulgb/sklearn-pandas/issues/11
290+
"""
291+
pipeline = Pipeline([
292+
("preprocess", DataFrameMapper([
293+
("petal length (cm)", None),
294+
("petal width (cm)", None),
295+
("sepal length (cm)", None),
296+
("sepal width (cm)", None),
297+
])),
298+
("classify", SVC(kernel='linear'))
299+
])
300+
data = iris_dataframe.drop("species", axis=1)
301+
labels = iris_dataframe["species"]
302+
scores = sklearn_cv_score(pipeline, data, labels)
303+
assert scores.mean() > 0.96
304+
assert (scores.std() * 2) < 0.04

tox.ini

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,17 @@ envlist = py27, py34
33

44
[testenv]
55
deps =
6-
pip==7.0.1
7-
pytest==2.7.1
8-
setuptools==16.0
9-
wheel==0.24.0
10-
flake8==2.4.1
11-
py27: mock==1.3.0
6+
pip==7.0.1
7+
pytest==2.7.1
8+
setuptools==16.0
9+
wheel==0.24.0
10+
flake8==2.4.1
11+
numpy==1.9.2
12+
scipy==0.16.0
13+
pandas==0.16.2
14+
scikit-learn==0.16.1
15+
py27: mock==1.3.0
1216

1317
commands =
14-
pip install numpy --no-index
15-
pip install scipy --no-index
16-
pip install pandas --no-index
17-
pip install scikit-learn --no-index
18-
python setup.py develop
19-
flake8 tests
20-
py.test
18+
flake8 tests
19+
py.test

0 commit comments

Comments
 (0)