Fix regression algorithms to give correct output dimensions (#1335)

eddiebergman · eddiebergman · commit d7b4a0559703 · 2022-08-18T20:08:48.000+02:00
* Added ignored_warnings file

* Use ignored_warnings file

* Test regressors with 1d, 1d as 2d and 2d targets

* Flake'd

* Fix broken relative imports to ignore_warnings

* Removed print and updated parameter type for tests

* Type import fix
diff --git a/autosklearn/pipeline/components/regression/gaussian_process.py b/autosklearn/pipeline/components/regression/gaussian_process.py
@@ -37,6 +37,9 @@ def fit(self, X, y):
             normalize_y=True
         )
 
+        if y.ndim == 2 and y.shape[1] == 1:
+            y = y.flatten()
+
         self.estimator.fit(X, y)
 
         return self
diff --git a/test/test_pipeline/components/regression/test_base.py b/test/test_pipeline/components/regression/test_base.py
@@ -1,4 +1,4 @@
-from typing import Type, Container
+from typing import Type
 
 import unittest
 
@@ -13,7 +13,7 @@
 
 from autosklearn.pipeline.components.regression import _regressors, RegressorChoice
 
-from test.test_pipeline.ignored_warnings import regressor_warnings, ignore_warnings
+from ...ignored_warnings import regressor_warnings, ignore_warnings
 
 
 class BaseRegressionComponentTest(unittest.TestCase):
@@ -331,7 +331,7 @@ def test_fit_and_predict_with_1d_targets_as_1d(
     regressor: Type[RegressorChoice],
     X: np.ndarray,
     y: np.ndarray
-) -> None:
+):
     """Test that all pipelines work with 1d target types
 
     Parameters
@@ -374,7 +374,7 @@ def test_fit_and_predict_with_1d_targets_as_2d(
     regressor: Type[RegressorChoice],
     X: np.ndarray,
     y: np.ndarray
-) -> None:
+):
     """Test that all pipelines work with 1d target types when they are wrapped as 2d
 
     Parameters
@@ -423,7 +423,7 @@ def test_fit_and_predict_with_2d_targets(
     regressor: Type[RegressorChoice],
     X: np.ndarray,
     y: np.ndarray
-) -> None:
+):
     """Test that all pipelines work with 2d target types
 
     Parameters
diff --git a/test/test_pipeline/ignored_warnings.py b/test/test_pipeline/ignored_warnings.py
@@ -2,7 +2,6 @@
 from typing import List, Iterator, Tuple
 
 import warnings
-
 from sklearn.exceptions import ConvergenceWarning
 
 
@@ -69,34 +68,14 @@
             r" optimization hasn't converged yet\."
         )
     ),
-    (
-        ConvergenceWarning, (  # From FastICA
-            r"FastICA did not converge\."
-            r" Consider increasing tolerance or the maximum number of iterations\."
-        )
-    ),
     (
         UserWarning, (  # From LDA (Linear Discriminant Analysis)
             r"Variables are collinear"
         )
     ),
-    (
-        UserWarning, (
-            r"Clustering metrics expects discrete values but received continuous values"
-            r" for label, and multiclass values for target"
-        )
-    )
-]
-
-feature_preprocessing_warnings = [
-    (
-        ConvergenceWarning, (  # From liblinear
-            r"Liblinear failed to converge, increase the number of iterations."
-        )
-    )
 ]
 
-ignored_warnings = regressor_warnings + classifier_warnings + feature_preprocessing_warnings
+ignored_warnings = regressor_warnings + classifier_warnings
 
 
 @contextmanager
diff --git a/test/test_pipeline/test_classification.py b/test/test_pipeline/test_classification.py
@@ -7,7 +7,6 @@
 import tempfile
 import unittest
 import unittest.mock
-import warnings
 
 from joblib import Memory
 import numpy as np
@@ -19,7 +18,6 @@
 import sklearn.ensemble
 import sklearn.svm
 from sklearn.utils.validation import check_is_fitted
-from sklearn.exceptions import ConvergenceWarning
 
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import CategoricalHyperparameter
@@ -34,42 +32,7 @@
 from autosklearn.pipeline.constants import \
     DENSE, SPARSE, UNSIGNED_DATA, PREDICTIONS, SIGNED_DATA, INPUT
 
-ignored_warnings = [
-    (
-        UserWarning, (  # From QuantileTransformer
-            r"n_quantiles \(\d+\) is greater than the total number of samples \(\d+\)\."
-            r" n_quantiles is set to n_samples\."
-        )
-    ),
-    (
-        UserWarning, (  # From FastICA
-            r"n_components is too large: it will be set to \d+"
-        )
-
-    ),
-    (
-        ConvergenceWarning, (  # From Liblinear
-            r"Liblinear failed to converge, increase the number of iterations\."
-        )
-    ),
-    (
-        ConvergenceWarning, (  # From SGD
-            r"Maximum number of iteration reached before convergence\. Consider increasing"
-            r" max_iter to improve the fit\."
-        )
-    ),
-    (
-        ConvergenceWarning, (  # From MLP
-            r"Stochastic Optimizer: Maximum iterations \(\d+\) reached and the"
-            r" optimization hasn't converged yet\."
-        )
-    ),
-    (
-        UserWarning, (  # From LDA (Linear Discriminant Analysis)
-            r"Variables are collinear"
-        )
-    ),
-]
+from .ignored_warnings import classifier_warnings, ignore_warnings
 
 
 class DummyClassifier(AutoSklearnClassificationAlgorithm):
@@ -535,10 +498,7 @@ def _test_configurations(
                     check_is_fitted(step)
 
             try:
-                with warnings.catch_warnings():
-                    for category, message in ignored_warnings:
-                        warnings.filterwarnings('ignore', category=category, message=message)
-
+                with ignore_warnings(classifier_warnings):
                     cls.fit(X_train, Y_train)
 
                 # After fit, all components should be tagged as fitted
diff --git a/test/test_pipeline/test_regression.py b/test/test_pipeline/test_regression.py
@@ -4,7 +4,6 @@
 import tempfile
 import unittest
 import unittest.mock
-import warnings
 
 from joblib import Memory
 import numpy as np
@@ -14,7 +13,6 @@
 import sklearn.ensemble
 import sklearn.svm
 from sklearn.utils.validation import check_is_fitted
-from sklearn.exceptions import ConvergenceWarning
 
 from ConfigSpace.configuration_space import ConfigurationSpace
 from ConfigSpace.hyperparameters import CategoricalHyperparameter
@@ -28,32 +26,7 @@
 from autosklearn.pipeline.util import get_dataset
 from autosklearn.pipeline.constants import SPARSE, DENSE, SIGNED_DATA, UNSIGNED_DATA, PREDICTIONS
 
-ignored_warnings = [
-    (
-        UserWarning, (  # From QuantileTransformer
-            r"n_quantiles \(\d+\) is greater than the total number of samples \(\d+\)\."
-            r" n_quantiles is set to n_samples\."
-        )
-    ),
-    (
-        ConvergenceWarning, (  # From GaussianProcesses
-            r"The optimal value found for dimension \d+ of parameter \w+ is close"
-            r" to the specified (upper|lower) bound .*(Increasing|Decreasing) the bound"
-            r" and calling fit again may find a better value."
-        )
-    ),
-    (
-        UserWarning, (  # From FastICA
-            r"n_components is too large: it will be set to \d+"
-        )
-    ),
-    (
-        ConvergenceWarning, (  # From SGD
-            r"Maximum number of iteration reached before convergence\. Consider increasing"
-            r" max_iter to improve the fit\."
-        )
-    ),
-]
+from .ignored_warnings import regressor_warnings, ignore_warnings
 
 
 class SimpleRegressionPipelineTest(unittest.TestCase):
@@ -207,21 +180,19 @@ def _test_configurations(self, configurations_space, make_sparse=False,
                     check_is_fitted(step)
 
             try:
-                with warnings.catch_warnings():
-                    for category, message in ignored_warnings:
-                        warnings.filterwarnings('ignore', category=category, message=message)
-
+                with ignore_warnings(regressor_warnings):
                     cls.fit(X_train, Y_train)
-                    # After fit, all components should be tagged as fitted
-                    # by sklearn. Check is fitted raises an exception if that
-                    # is not the case
-                    try:
-                        for name, step in cls.named_steps.items():
-                            check_is_fitted(step)
-                    except sklearn.exceptions.NotFittedError:
-                        self.fail("config={} raised NotFittedError unexpectedly!".format(
-                            config
-                        ))
+
+                # After fit, all components should be tagged as fitted
+                # by sklearn. Check is fitted raises an exception if that
+                # is not the case
+                try:
+                    for name, step in cls.named_steps.items():
+                        check_is_fitted(step)
+                except sklearn.exceptions.NotFittedError:
+                    self.fail("config={} raised NotFittedError unexpectedly!".format(
+                        config
+                    ))
 
                 cls.predict(X_test)
             except MemoryError:

Original file line number	Diff line number	Diff line change
`@@ -37,6 +37,9 @@ def fit(self, X, y):`
`37`	`37`	`normalize_y=True`
`38`	`38`	`)`
`39`	`39`
	`40`	`+ if y.ndim == 2 and y.shape[1] == 1:`
	`41`	`+ y = y.flatten()`
	`42`	`+`
`40`	`43`	`self.estimator.fit(X, y)`
`41`	`44`
`42`	`45`	`return self`