Skip to content

Commit 766fd3f

Browse files
committed
Fix random state not being used for sampling configurations (#1329)
* Added random state to classifiers * Added some doc strings * Removed random_state again * flake'd * Fix some test issues * Re-added seed to test * Updated test doc for unknown test * flake'd
1 parent 326c1a4 commit 766fd3f

File tree

5 files changed

+558
-229
lines changed

5 files changed

+558
-229
lines changed

autosklearn/util/pipeline.py

Lines changed: 91 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
# -*- encoding: utf-8 -*-
2-
from typing import Any, Dict, List, Optional
2+
from typing import Any, Dict, List, Optional, Union
33

44
from ConfigSpace.configuration_space import ConfigurationSpace
55

6-
from sklearn.pipeline import Pipeline
6+
import numpy as np
77

88
from autosklearn.constants import (
99
BINARY_CLASSIFICATION,
@@ -16,27 +16,69 @@
1616
from autosklearn.pipeline.regression import SimpleRegressionPipeline
1717

1818

19-
__all__ = [
20-
'get_configuration_space',
21-
'get_class',
22-
]
19+
__all__ = ['get_configuration_space']
2320

2421

25-
def get_configuration_space(info: Dict[str, Any],
26-
include: Optional[Dict[str, List[str]]] = None,
27-
exclude: Optional[Dict[str, List[str]]] = None,
28-
) -> ConfigurationSpace:
22+
def get_configuration_space(
23+
info: Dict[str, Any],
24+
include: Optional[Dict[str, List[str]]] = None,
25+
exclude: Optional[Dict[str, List[str]]] = None,
26+
random_state: Optional[Union[int, np.random.RandomState]] = None
27+
) -> ConfigurationSpace:
28+
"""Get the configuration of a pipeline given some dataset info
2929
30+
Parameters
31+
----------
32+
info: Dict[str, Any]
33+
Information about the dataset
34+
35+
include: Optional[Dict[str, List[str]]] = None
36+
A dictionary of what components to include for each pipeline step
37+
38+
exclude: Optional[Dict[str, List[str]]] = None
39+
A dictionary of what components to exclude for each pipeline step
40+
41+
random_state: Optional[Union[int, np.random.Randomstate]] = None
42+
The random state to use for seeding the ConfigSpace
43+
44+
Returns
45+
-------
46+
ConfigurationSpace
47+
The configuration space for the pipeline
48+
"""
3049
if info['task'] in REGRESSION_TASKS:
31-
return _get_regression_configuration_space(info, include, exclude)
50+
return _get_regression_configuration_space(info, include, exclude, random_state)
3251
else:
33-
return _get_classification_configuration_space(info, include, exclude)
52+
return _get_classification_configuration_space(info, include, exclude, random_state)
53+
3454

55+
def _get_regression_configuration_space(
56+
info: Dict[str, Any],
57+
include: Optional[Dict[str, List[str]]],
58+
exclude: Optional[Dict[str, List[str]]],
59+
random_state: Optional[Union[int, np.random.RandomState]] = None
60+
) -> ConfigurationSpace:
61+
"""Get the configuration of a regression pipeline given some dataset info
3562
36-
def _get_regression_configuration_space(info: Dict[str, Any],
37-
include: Optional[Dict[str, List[str]]],
38-
exclude: Optional[Dict[str, List[str]]]
39-
) -> ConfigurationSpace:
63+
Parameters
64+
----------
65+
info: Dict[str, Any]
66+
Information about the dataset
67+
68+
include: Optional[Dict[str, List[str]]] = None
69+
A dictionary of what components to include for each pipeline step
70+
71+
exclude: Optional[Dict[str, List[str]]] = None
72+
A dictionary of what components to exclude for each pipeline step
73+
74+
random_state: Optional[Union[int, np.random.Randomstate]] = None
75+
The random state to use for seeding the ConfigSpace
76+
77+
Returns
78+
-------
79+
ConfigurationSpace
80+
The configuration space for the regression pipeline
81+
"""
4082
task_type = info['task']
4183
sparse = False
4284
multioutput = False
@@ -54,15 +96,39 @@ def _get_regression_configuration_space(info: Dict[str, Any],
5496
configuration_space = SimpleRegressionPipeline(
5597
dataset_properties=dataset_properties,
5698
include=include,
57-
exclude=exclude
99+
exclude=exclude,
100+
random_state=random_state
58101
).get_hyperparameter_search_space()
59102
return configuration_space
60103

61104

62-
def _get_classification_configuration_space(info: Dict[str, Any],
63-
include: Optional[Dict[str, List[str]]],
64-
exclude: Optional[Dict[str, List[str]]]
65-
) -> ConfigurationSpace:
105+
def _get_classification_configuration_space(
106+
info: Dict[str, Any],
107+
include: Optional[Dict[str, List[str]]],
108+
exclude: Optional[Dict[str, List[str]]],
109+
random_state: Optional[Union[int, np.random.RandomState]] = None
110+
) -> ConfigurationSpace:
111+
"""Get the configuration of a classification pipeline given some dataset info
112+
113+
Parameters
114+
----------
115+
info: Dict[str, Any]
116+
Information about the dataset
117+
118+
include: Optional[Dict[str, List[str]]] = None
119+
A dictionary of what components to include for each pipeline step
120+
121+
exclude: Optional[Dict[str, List[str]]] = None
122+
A dictionary of what components to exclude for each pipeline step
123+
124+
random_state: Optional[Union[int, np.random.Randomstate]] = None
125+
The random state to use for seeding the ConfigSpace
126+
127+
Returns
128+
-------
129+
ConfigurationSpace
130+
The configuration space for the classification pipeline
131+
"""
66132
task_type = info['task']
67133

68134
multilabel = False
@@ -87,12 +153,7 @@ def _get_classification_configuration_space(info: Dict[str, Any],
87153

88154
return SimpleClassificationPipeline(
89155
dataset_properties=dataset_properties,
90-
include=include, exclude=exclude).\
91-
get_hyperparameter_search_space()
92-
93-
94-
def get_class(info: Dict[str, Any]) -> Pipeline:
95-
if info['task'] in REGRESSION_TASKS:
96-
return SimpleRegressionPipeline
97-
else:
98-
return SimpleClassificationPipeline
156+
include=include,
157+
exclude=exclude,
158+
random_state=random_state
159+
).get_hyperparameter_search_space()

test/test_metalearning/pyMetaLearn/test_meta_base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ def setUp(self):
1717
data_dir = os.path.join(data_dir, 'test_meta_base_data')
1818
os.chdir(data_dir)
1919

20-
cs = autosklearn.pipeline.classification.SimpleClassificationPipeline()\
21-
.get_hyperparameter_search_space()
20+
pipeline = autosklearn.pipeline.classification.SimpleClassificationPipeline()
21+
cs = pipeline.get_hyperparameter_search_space()
2222

2323
self.logger = logging.getLogger()
2424
self.base = MetaBase(cs, data_dir, logger=self.logger)

test/test_metalearning/pyMetaLearn/test_metalearner.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ def setUp(self):
2323
data_dir = os.path.join(data_dir, 'test_meta_base_data')
2424
os.chdir(data_dir)
2525

26-
self.cs = autosklearn.pipeline.classification\
27-
.SimpleClassificationPipeline().get_hyperparameter_search_space()
26+
pipeline = autosklearn.pipeline.classification.SimpleClassificationPipeline()
27+
self.cs = pipeline.get_hyperparameter_search_space()
2828

2929
self.logger = logging.getLogger()
3030
meta_base = MetaBase(self.cs, data_dir, logger=self.logger)

test/test_pipeline/components/data_preprocessing/test_balancing.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -108,9 +108,7 @@ def test_weighting_effect(self):
108108
default = cs.get_default_configuration()
109109
default._values['balancing:strategy'] = strategy
110110

111-
classifier = SimpleClassificationPipeline(
112-
config=default, **model_args
113-
)
111+
classifier = SimpleClassificationPipeline(config=default, **model_args)
114112
classifier.fit(X_train, Y_train)
115113

116114
predictions1 = classifier.predict(X_test)
@@ -126,9 +124,7 @@ def test_weighting_effect(self):
126124
X_test = data_[0][100:]
127125
Y_test = data_[1][100:]
128126

129-
classifier = SimpleClassificationPipeline(
130-
config=default, **model_args
131-
)
127+
classifier = SimpleClassificationPipeline(config=default, **model_args)
132128
Xt, fit_params = classifier.fit_transformer(X_train, Y_train)
133129
classifier.fit_estimator(Xt, Y_train, **fit_params)
134130

@@ -157,8 +153,7 @@ def test_weighting_effect(self):
157153

158154
include = {'classifier': ['sgd'], 'feature_preprocessor': [name]}
159155

160-
classifier = SimpleClassificationPipeline(
161-
random_state=1, include=include)
156+
classifier = SimpleClassificationPipeline(random_state=1, include=include)
162157
cs = classifier.get_hyperparameter_search_space()
163158
default = cs.get_default_configuration()
164159
default._values['balancing:strategy'] = strategy
@@ -177,8 +172,7 @@ def test_weighting_effect(self):
177172
Y_test = data_[1][100:]
178173

179174
default._values['balancing:strategy'] = strategy
180-
classifier = SimpleClassificationPipeline(
181-
default, random_state=1, include=include)
175+
classifier = SimpleClassificationPipeline(default, random_state=1, include=include)
182176
Xt, fit_params = classifier.fit_transformer(X_train, Y_train)
183177
classifier.fit_estimator(Xt, Y_train, **fit_params)
184178
predictions = classifier.predict(X_test)

0 commit comments

Comments
 (0)