scikit-learn-contrib · glemaitre · May 8, 2018 · Mar 20, 2018 · Mar 20, 2018 · Mar 26, 2018
diff --git a/doc/api.rst b/doc/api.rst
@@ -205,4 +205,5 @@ Imbalance-learn provides some fast-prototyping tools.
    utils.estimator_checks.check_estimator
    utils.check_neighbors_object
    utils.check_ratio
+   utils.check_sampling_strategy
    utils.hash_X_y
diff --git a/doc/datasets/index.rst b/doc/datasets/index.rst
@@ -94,29 +94,31 @@ Imbalanced generator
 ====================
 
 :func:`make_imbalance` turns an original dataset into an imbalanced
-dataset. This behaviour is driven by the parameter ``ratio`` which behave
-similarly to other resampling algorithm. ``ratio`` can be given as a dictionary
-where the key corresponds to the class and the value is the the number of
-samples in the class::
+dataset. This behaviour is driven by the parameter ``sampling_strategy`` which
+behave similarly to other resampling algorithm. ``sampling_strategy`` can be
+given as a dictionary where the key corresponds to the class and the value is
+the number of samples in the class::
 
   >>> from sklearn.datasets import load_iris
   >>> from imblearn.datasets import make_imbalance
   >>> iris = load_iris()
-  >>> ratio = {0: 20, 1: 30, 2: 40}
-  >>> X_imb, y_imb = make_imbalance(iris.data, iris.target, ratio=ratio)
+  >>> sampling_strategy = {0: 20, 1: 30, 2: 40}
+  >>> X_imb, y_imb = make_imbalance(iris.data, iris.target,
+  ...                               sampling_strategy=sampling_strategy)
   >>> sorted(Counter(y_imb).items())
   [(0, 20), (1, 30), (2, 40)]
 
 Note that all samples of a class are passed-through if the class is not mentioned
 in the dictionary::
 
-  >>> ratio = {0: 10}
-  >>> X_imb, y_imb = make_imbalance(iris.data, iris.target, ratio=ratio)
+  >>> sampling_strategy = {0: 10}
+  >>> X_imb, y_imb = make_imbalance(iris.data, iris.target,
+  ...                               sampling_strategy=sampling_strategy)
   >>> sorted(Counter(y_imb).items())
   [(0, 10), (1, 50), (2, 50)]
 
 Instead of a dictionary, a function can be defined and directly pass to
-``ratio``::
+``sampling_strategy``::
 
   >>> def ratio_multiplier(y):
   ...     multiplier = {0: 0.5, 1: 0.7, 2: 0.95}
@@ -125,9 +127,9 @@ Instead of a dictionary, a function can be defined and directly pass to
   ...         target_stats[key] = int(value * multiplier[key])
   ...     return target_stats
   >>> X_imb, y_imb = make_imbalance(iris.data, iris.target,
-  ...                               ratio=ratio_multiplier)
+  ...                               sampling_strategy=ratio_multiplier)
   >>> sorted(Counter(y_imb).items())
   [(0, 25), (1, 35), (2, 47)]
 
 See :ref:`sphx_glr_auto_examples_datasets_plot_make_imbalance.py` and
-:ref:`sphx_glr_auto_examples_plot_ratio_usage.py`.
+:ref:`sphx_glr_auto_examples_plot_sampling_strategy_usage.py`.
diff --git a/doc/developers_utils.rst b/doc/developers_utils.rst
@@ -26,9 +26,10 @@ which accepts arrays, matrices, or sparse matrices as arguments, the following
 should be used when applicable.
 
 - :func:`check_neighbors_object`: Check the objects is consistent to be a NN.
-- :func:`check_target_type`: Check the target types to be conform to the current samplers.
-- :func:`check_ratio`: Checks ratio for consistent type and return a dictionary
-    containing each targeted class with its corresponding number of pixel.
+- :func:`check_target_type`: Check the target types to be conform to the current sam  plers.
+- :func:`check_sampling_strategy`: Checks that sampling target is onsistent with
+  the type and return a dictionary containing each targeted class with its
+  corresponding number of pixel.
 
 
 Deprecation

diff --git a/doc/ensemble.rst b/doc/ensemble.rst
@@ -92,12 +92,13 @@ output of an :class:`EasyEnsemble` sampler with an ensemble of classifiers
 (i.e. ``BaggingClassifier``). Therefore, :class:`BalancedBaggingClassifier`
 takes the same parameters than the scikit-learn
 ``BaggingClassifier``. Additionally, there is two additional parameters,
-``ratio`` and ``replacement``, as in the :class:`EasyEnsemble` sampler::
+``sampling_strategy`` and ``replacement``, as in the :class:`EasyEnsemble`
+sampler::
 
 
   >>> from imblearn.ensemble import BalancedBaggingClassifier
   >>> bbc = BalancedBaggingClassifier(base_estimator=DecisionTreeClassifier(),
-  ...                                 ratio='auto',
+  ...                                 sampling_strategy='auto',
   ...                                 replacement=False,
   ...                                 random_state=0)
   >>> bbc.fit(X_train, y_train) # doctest: +ELLIPSIS

diff --git a/doc/under_sampling.rst b/doc/under_sampling.rst
@@ -103,7 +103,7 @@ by considering independently each targeted class::
   >>> print(np.vstack({tuple(row) for row in X_resampled}).shape)
   (181, 2)
 
-See :ref:`sphx_glr_auto_examples_plot_ratio_usage.py`,
+See :ref:`sphx_glr_auto_examples_plot_sampling_strategy_usage.py`.,
 :ref:`sphx_glr_auto_examples_under-sampling_plot_comparison_under_sampling.py`,
 and :ref:`sphx_glr_auto_examples_under-sampling_plot_random_under_sampler.py`.
 
@@ -214,11 +214,11 @@ the samples of interest in green.
    :scale: 60
    :align: center
 
-The parameter ``ratio`` control which sample of the link will be removed. For
-instance, the default (i.e., ``ratio='auto'``) will remove the sample from the
-majority class. Both samples from the majority and minority class can be
-removed by setting ``ratio`` to ``'all'``. The figure illustrates this
-behaviour.
+The parameter ``sampling_strategy`` control which sample of the link will be
+removed. For instance, the default (i.e., ``sampling_strategy='auto'``) will
+remove the sample from the majority class. Both samples from the majority and
+minority class can be removed by setting ``sampling_strategy`` to ``'all'``. The
+figure illustrates this behaviour.
 
 .. image:: ./auto_examples/under-sampling/images/sphx_glr_plot_illustration_tomek_links_002.png
    :target: ./auto_examples/under-sampling/plot_illustration_tomek_links.html

diff --git a/doc/whats_new/v0.0.4.rst b/doc/whats_new/v0.0.4.rst
@@ -6,6 +6,18 @@ Version 0.4 (under development)
 Changelog
 ---------
 
+API
+...
+
+- Replace the parameter ``ratio`` by ``sampling_strategy``. :issue:`411` by
+  :user:`Guillaume Lemaitre <glemaitre>`.
+
+- Enable to use a ``float`` with binary classification for
+  ``sampling_strategy``. :issue:`411` by :user:`Guillaume Lemaitre <glemaitre>`.
+
+- Enable to use a ``list`` for the cleaning methods to specify the class to
+  sample. :issue:`411` by :user:`Guillaume Lemaitre <glemaitre>`.
+
 Enhancement
 ...........
 
@@ -34,3 +46,20 @@ Maintenance
 
 - Remove deprecated parameters in 0.2 - :issue:`331` by :user:`Guillaume
   Lemaitre <glemaitre>`.
+
+Deprecation
+...........
+
+- Deprecate ``ratio`` in favor of ``sampling_strategy``. :issue:`411` by
+  :user:`Guillaume Lemaitre <glemaitre>`.
+
+- Deprecate the use of a ``dict`` for cleaning methods. a ``list`` should be
+  used. :issue:`411` by :user:`Guillaume Lemaitre <glemaitre>`.
+
+- Deprecate ``random_state`` in :class:`imblearn.under_sampling.NearMiss`,
+  :class:`imblearn.under_sampling.EditedNearestNeighbors`,
+  :class:`imblearn.under_sampling.RepeatedEditedNearestNeighbors`,
+  :class:`imblearn.under_sampling.AllKNN`,
+  :class:`imblearn.under_sampling.NeighbourhoodCleaningRule`,
+  :class:`imblearn.under_sampling.InstanceHardnessThreshold`,
+  :class:`imblearn.under_sampling.CondensedNearestNeighbours`.
diff --git a/examples/applications/plot_multi_class_under_sampling.py b/examples/applications/plot_multi_class_under_sampling.py
@@ -29,8 +29,9 @@
 
 # Create a folder to fetch the dataset
 iris = load_iris()
-X, y = make_imbalance(iris.data, iris.target, ratio={0: 25, 1: 50, 2: 50},
-                      random_state=0)
+X, y = make_imbalance(iris.data, iris.target,
+                      sampling_strategy={0: 25, 1: 50, 2: 50},
+                      random_state=RANDOM_STATE)
 
 X_train, X_test, y_train, y_test = train_test_split(
     X, y, random_state=RANDOM_STATE)
@@ -39,7 +40,7 @@
 print('Testing target statistics: {}'.format(Counter(y_test)))
 
 # Create a pipeline
-pipeline = make_pipeline(NearMiss(version=2, random_state=RANDOM_STATE),
+pipeline = make_pipeline(NearMiss(version=2),
                          LinearSVC(random_state=RANDOM_STATE))
 pipeline.fit(X_train, y_train)
 

diff --git a/examples/datasets/plot_make_imbalance.py b/examples/datasets/plot_make_imbalance.py
@@ -55,12 +55,12 @@ def ratio_func(y, multiplier, minority_class):
 for i, multiplier in enumerate(multipliers, start=1):
     ax = axs[i]
 
-    X_, y_ = make_imbalance(X, y, ratio=ratio_func,
+    X_, y_ = make_imbalance(X, y, sampling_strategy=ratio_func,
                             **{"multiplier": multiplier,
                                "minority_class": 1})
     ax.scatter(X_[y_ == 0, 0], X_[y_ == 0, 1], label="Class #0", alpha=0.5)
     ax.scatter(X_[y_ == 1, 0], X_[y_ == 1, 1], label="Class #1", alpha=0.5)
-    ax.set_title('ratio = {}'.format(multiplier))
+    ax.set_title('sampling_strategy = {}'.format(multiplier))
     plot_decoration(ax)
 
 plt.tight_layout()

diff --git a/examples/plot_ratio_usage.py b/examples/plot_ratio_usage.py