Skip to content

Multi reg #802

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 45 additions & 8 deletions autosklearn/automl.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from autosklearn.smbo import AutoMLSMBO
from autosklearn.util.hash import hash_array_or_matrix
from autosklearn.metrics import f1_macro, accuracy, r2
from autosklearn.constants import MULTIOUTPUT_REGRESSION
from autosklearn.constants import *


Expand Down Expand Up @@ -1059,7 +1060,8 @@ def predict_proba(self, X, batch_size=None, n_jobs=1):
class AutoMLRegressor(BaseAutoML):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

self._task_mapping = {'continuous-multioutput': MULTIOUTPUT_REGRESSION}

def fit(
self,
X: np.ndarray,
Expand All @@ -1073,27 +1075,62 @@ def fit(
load_models: bool = True,
):
X, y = super()._perform_input_checks(X, y)
_n_outputs = 1 if len(y.shape) == 1 else y.shape[1]
if _n_outputs > 1:
raise NotImplementedError(
'Multi-output regression is not implemented.')
if X_test is not None:
X_test, y_test = self._perform_input_checks(X_test, y_test)
if len(y.shape) != len(y_test.shape):
raise ValueError('Target value shapes do not match: %s vs %s'
% (y.shape, y_test.shape))

y_task = type_of_target(y)
task = self._task_mapping.get(y_task)

if task is None:
raise ValueError('Cannot work on data of type %s' % y_task)

if metric is None:
metric = r2
return super().fit(
X, y,
X_test=X_test,
y_test=y_test,
task=REGRESSION,
task=task,
metric=metric,
feat_type=feat_type,
dataset_name=dataset_name,
only_return_configuration_space=only_return_configuration_space,
load_models=load_models,
)

def fit_ensemble(self, y, task=None, metric=None, precision='32',
dataset_name=None, ensemble_nbest=None,
ensemble_size=None):
y = super()._check_y(y)
y, _target, _n_targets = self._process_targets(y)
if not hasattr(self, '_target'):
self._target = _target
if not hasattr(self, '_n_targets'):
self._n_targets = _n_targets

return super().fit_ensemble(y, task, metric, precision, dataset_name,
ensemble_nbest, ensemble_size)

def _process_targets(self, y):
y = super()._check_y(y)
self._n_outputs = 1 if len(y.shape) == 1 else y.shape[1]

y = np.copy(y)

_target = []
_n_targets = []

if self._n_outputs == 1:
target_k, y = np.unique(y, return_inverse=True)
_target.append(target_k)
_n_targets.append(target_k.shape[0])
else:
for k in range(self._n_outputs):
target_k, y[:, k] = np.unique(y[:, k], return_inverse=True)
_target.append(target_k)
_n_targets.append(target_k.shape[0])

_n_targets = np.array(_n_targets, dtype=np.int)

return y, _target, _n_targets
9 changes: 6 additions & 3 deletions autosklearn/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
MULTICLASS_CLASSIFICATION = 2
MULTILABEL_CLASSIFICATION = 3
REGRESSION = 4
MULTIOUTPUT_REGRESSION = 5

REGRESSION_TASKS = [REGRESSION]
REGRESSION_TASKS = [REGRESSION, MULTIOUTPUT_REGRESSION]
CLASSIFICATION_TASKS = [BINARY_CLASSIFICATION, MULTICLASS_CLASSIFICATION,
MULTILABEL_CLASSIFICATION]

Expand All @@ -15,10 +16,12 @@
{BINARY_CLASSIFICATION: 'binary.classification',
MULTICLASS_CLASSIFICATION: 'multiclass.classification',
MULTILABEL_CLASSIFICATION: 'multilabel.classification',
REGRESSION: 'regression'}
REGRESSION: 'regression',
MULTIOUTPUT_REGRESSION: 'multioutpu.regression'}

STRING_TO_TASK_TYPES = \
{'binary.classification': BINARY_CLASSIFICATION,
'multiclass.classification': MULTICLASS_CLASSIFICATION,
'multilabel.classification': MULTILABEL_CLASSIFICATION,
'regression': REGRESSION}
'regression': REGRESSION,
'multioutput.regression': MULTIOUTPUT_REGRESSION}
8 changes: 3 additions & 5 deletions autosklearn/estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -746,15 +746,15 @@ def fit(self, X, y,
X : array-like or sparse matrix of shape = [n_samples, n_features]
The training input samples.

y : array-like, shape = [n_samples]
y : array-like, shape = [n_samples] or [n_samples, n_targets]
The regression target.

X_test : array-like or sparse matrix of shape = [n_samples, n_features]
Test data input samples. Will be used to save test predictions for
all models. This allows to evaluate the performance of Auto-sklearn
over time.

y_test : array-like, shape = [n_samples]
y_test : array-like, shape = [n_samples] or [n_samples, n_targets]
The regression target. Will be used to calculate the test error
of all models. This allows to evaluate the performance of
Auto-sklearn over time.
Expand Down Expand Up @@ -782,9 +782,7 @@ def fit(self, X, y,
# type of data is compatible with auto-sklearn. Legal target
# types are: continuous, binary, multiclass.
target_type = type_of_target(y)
if target_type in ['multiclass-multioutput',
'multilabel-indicator',
'continuous-multioutput',
if target_type in ['multilabel-indicator',
'unknown',
]:
raise ValueError("regression with data of type %s is not"
Expand Down
31 changes: 20 additions & 11 deletions autosklearn/evaluation/abstract_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
REGRESSION_TASKS,
MULTILABEL_CLASSIFICATION,
MULTICLASS_CLASSIFICATION,
MULTIOUTPUT_REGRESSION
)
from autosklearn.pipeline.implementations.util import (
convert_multioutput_multiclass_to_multilabel
Expand Down Expand Up @@ -183,12 +184,21 @@ def _get_model(self):
random_state=self.seed,
init_params=self._init_params)
else:
dataset_properties = {
'task': self.task_type,
'sparse': self.datamanager.info['is_sparse'] == 1,
'multilabel': self.task_type == MULTILABEL_CLASSIFICATION,
'multiclass': self.task_type == MULTICLASS_CLASSIFICATION,
}
if self.task_type in REGRESSION_TASKS:
dataset_properties = {
'task': self.task_type,
'sparse': self.datamanager.info['is_sparse'] == 1,
'multioutput': self.task_type == MULTIOUTPUT_REGRESSION,
}
else:
dataset_properties = {
'task': self.task_type,
'sparse': self.datamanager.info['is_sparse'] == 1,
'multilabel': self.task_type == MULTILABEL_CLASSIFICATION,
'multiclass': self.task_type == MULTICLASS_CLASSIFICATION,
}


model = self.model_class(config=self.configuration,
dataset_properties=dataset_properties,
random_state=self.seed,
Expand Down Expand Up @@ -412,19 +422,18 @@ def send_warnings_to_log(message, category, filename, lineno,

if len(Y_pred.shape) == 1:
Y_pred = Y_pred.reshape((-1, 1))

Y_pred = self._ensure_prediction_array_sizes(Y_pred, Y_train)
return Y_pred

def _ensure_prediction_array_sizes(self, prediction, Y_train):
num_classes = self.datamanager.info['label_num']

if self.task_type == MULTICLASS_CLASSIFICATION and \
prediction.shape[1] < num_classes:
multi_type = [MULTICLASS_CLASSIFICATION, MULTIOUTPUT_REGRESSION]
if self.task_type in multi_type and prediction.shape[1] < num_classes:
if Y_train is None:
raise ValueError('Y_train must not be None!')
classes = list(np.unique(Y_train))

mapping = dict()

for class_number in range(num_classes):
if class_number in classes:
index = classes.index(class_number)
Expand Down
2 changes: 1 addition & 1 deletion autosklearn/pipeline/components/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ def predict(self, X):

Returns
-------
array, shape = (n_samples,)
array, shape = (n_samples, ) or shape = (n_samples, n_targets)
Returns the predicted values

Notes
Expand Down
6 changes: 6 additions & 0 deletions autosklearn/pipeline/components/regression/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,11 @@ def get_components(cls):

@classmethod
def get_available_components(cls, data_prop,
dataset_properties=None,
include=None,
exclude=None):
if dataset_properties is None:
dataset_properties = {}
available_comp = cls.get_components()
components_dict = OrderedDict()

Expand Down Expand Up @@ -59,6 +62,9 @@ def get_available_components(cls, data_prop,

if entry.get_properties()['handles_regression'] is False:
continue
if dataset_properties.get('multioutput') is True and available_comp[name]. \
get_properties()['handles_multioutput'] is False:
continue
components_dict[name] = entry

return components_dict
Expand Down
2 changes: 1 addition & 1 deletion autosklearn/pipeline/components/regression/adaboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ def get_properties(dataset_properties=None):
'name': 'AdaBoost Regressor',
'handles_regression': True,
'handles_classification': False,
'handles_multiclass': False,
'handles_multilabel': False,
'handles_multioutput': True,
'is_deterministic': True,
'input': (DENSE, SPARSE, UNSIGNED_DATA),
'output': (PREDICTIONS, )}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def get_properties(dataset_properties=None):
'handles_classification': False,
'handles_multiclass': False,
'handles_multilabel': False,
'handles_multioutput': False,
'prefers_data_normalized': True,
'is_deterministic': True,
'input': (DENSE, UNSIGNED_DATA),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def get_properties(dataset_properties=None):
'handles_classification': False,
'handles_multiclass': False,
'handles_multilabel': False,
'handles_multioutput': True,
'is_deterministic': False,
'input': (DENSE, SPARSE, UNSIGNED_DATA),
'output': (PREDICTIONS,)}
Expand Down
1 change: 1 addition & 0 deletions autosklearn/pipeline/components/regression/extra_trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ def get_properties(dataset_properties=None):
'handles_classification': False,
'handles_multiclass': False,
'handles_multilabel': False,
'handles_multioutput': True,
'is_deterministic': True,
'input': (DENSE, SPARSE, UNSIGNED_DATA),
'output': (PREDICTIONS,)}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def get_properties(dataset_properties=None):
'handles_regression': True,
'handles_classification': False,
'handles_multiclass': False,
'handles_multioutput': True,
'handles_multilabel': False,
'is_deterministic': True,
'input': (DENSE, UNSIGNED_DATA),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def get_properties(dataset_properties=None):
'handles_regression': True,
'handles_classification': False,
'handles_multiclass': False,
'handles_multioutput': True,
'handles_multilabel': False,
'is_deterministic': True,
'input': (DENSE, SPARSE, UNSIGNED_DATA),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def get_properties(dataset_properties=None):
'handles_classification': False,
'handles_multiclass': False,
'handles_multilabel': False,
'handles_multioutput': False,
'is_deterministic': False,
'input': (SPARSE, DENSE, UNSIGNED_DATA),
'output': (PREDICTIONS,)}
Expand Down
1 change: 1 addition & 0 deletions autosklearn/pipeline/components/regression/libsvm_svr.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ def get_properties(dataset_properties=None):
'handles_classification': False,
'handles_multiclass': False,
'handles_multilabel': False,
'handles_multioutput': False,
'prefers_data_normalized': True,
'is_deterministic': True,
'input': (SPARSE, DENSE, UNSIGNED_DATA),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ def get_properties(dataset_properties=None):
'handles_regression': True,
'handles_classification': False,
'handles_multiclass': False,
'handles_multioutput': True,
'handles_multilabel': False,
'prefers_data_normalized': False,
'is_deterministic': True,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def get_properties(dataset_properties=None):
'handles_classification': False,
'handles_multiclass': False,
'handles_multilabel': False,
'handles_multioutput': True,
'prefers_data_normalized': True,
'is_deterministic': True,
'input': (SPARSE, DENSE, UNSIGNED_DATA),
Expand Down
1 change: 1 addition & 0 deletions autosklearn/pipeline/components/regression/sgd.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ def get_properties(dataset_properties=None):
'handles_classification': False,
'handles_multiclass': False,
'handles_multilabel': False,
'handles_multioutput': False,
'is_deterministic': True,
'handles_sparse': True,
'input': (DENSE, SPARSE, UNSIGNED_DATA),
Expand Down
23 changes: 13 additions & 10 deletions autosklearn/pipeline/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
MULTICLASS_CLASSIFICATION = 2
MULTILABEL_CLASSIFICATION = 3
REGRESSION = 4
MULTIOUTPUT_REGRESSION = 5

REGRESSION_TASKS = [REGRESSION]
REGRESSION_TASKS = [REGRESSION, MULTIOUTPUT_REGRESSION]
CLASSIFICATION_TASKS = [BINARY_CLASSIFICATION, MULTICLASS_CLASSIFICATION,
MULTILABEL_CLASSIFICATION]

Expand All @@ -15,26 +16,28 @@
{BINARY_CLASSIFICATION: "binary.classification",
MULTICLASS_CLASSIFICATION: "multiclass.classification",
MULTILABEL_CLASSIFICATION: "multilabel.classification",
REGRESSION: "regression"}
REGRESSION: "regression",
MULTIOUTPUT_REGRESSION: "multioutput.regression"}

STRING_TO_TASK_TYPES = \
{"binary.classification": BINARY_CLASSIFICATION,
"multiclass.classification": MULTICLASS_CLASSIFICATION,
"multilabel.classification": MULTILABEL_CLASSIFICATION,
"regression": REGRESSION}
"regression": REGRESSION,
"multioutput.regression": MULTIOUTPUT_REGRESSION}

DENSE = 5
SPARSE = 6
PREDICTIONS = 7
INPUT = 8
DENSE = 6
SPARSE = 7
PREDICTIONS = 8
INPUT = 9

SIGNED_DATA = 9
UNSIGNED_DATA = 10
SIGNED_DATA = 10
UNSIGNED_DATA = 11

DATASET_PROPERTIES_TO_STRING = \
{DENSE: 'dense',
SPARSE: 'sparse',
PREDICTIONS: 'predictions',
INPUT: 'input',
SIGNED_DATA: 'signed data',
UNSIGNED_DATA: 'unsigned data'}
UNSIGNED_DATA: 'unsigned data'}
Loading