forked from scikit-learn-contrib/imbalanced-learn
-
Notifications
You must be signed in to change notification settings - Fork 25
/
Copy pathplot_multi_class_under_sampling.py
48 lines (35 loc) · 1.46 KB
/
plot_multi_class_under_sampling.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
"""
=============================================
Multiclass classification with under-sampling
=============================================
Some balancing methods allow for balancing dataset with multiples classes.
We provide an example to illustrate the use of those methods which do
not differ from the binary case.
"""
# Authors: Guillaume Lemaitre <[email protected]>
# License: MIT
from collections import Counter
from sklearn.datasets import load_iris
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
from imblearn.datasets import make_imbalance
from imblearn.under_sampling import NearMiss
from imblearn.pipeline import make_pipeline
from imblearn.metrics import classification_report_imbalanced
print(__doc__)
RANDOM_STATE = 42
# Create a folder to fetch the dataset
iris = load_iris()
X, y = make_imbalance(iris.data, iris.target,
sampling_strategy={0: 25, 1: 50, 2: 50},
random_state=RANDOM_STATE)
X_train, X_test, y_train, y_test = train_test_split(
X, y, random_state=RANDOM_STATE)
print('Training target statistics: {}'.format(Counter(y_train)))
print('Testing target statistics: {}'.format(Counter(y_test)))
# Create a pipeline
pipeline = make_pipeline(NearMiss(version=2),
LinearSVC(random_state=RANDOM_STATE))
pipeline.fit(X_train, y_train)
# Classify and report the results
print(classification_report_imbalanced(y_test, pipeline.predict(X_test)))