From 1bc2111fca5fbe968f3f9155adacfa3ddb202763 Mon Sep 17 00:00:00 2001 From: ashtoff Date: Sun, 19 May 2024 23:24:14 +0300 Subject: [PATCH 01/21] Initial implementation of Bernstein polynomials. --- sklearn_extra/preprocessing/__init__.py | 9 ++ .../preprocessing/_polynomial_basis.py | 65 ++++++++++ .../tests/test_polynomial_basis.py | 112 ++++++++++++++++++ 3 files changed, 186 insertions(+) create mode 100644 sklearn_extra/preprocessing/__init__.py create mode 100644 sklearn_extra/preprocessing/_polynomial_basis.py create mode 100644 sklearn_extra/preprocessing/tests/test_polynomial_basis.py diff --git a/sklearn_extra/preprocessing/__init__.py b/sklearn_extra/preprocessing/__init__.py new file mode 100644 index 00000000..ecc96f00 --- /dev/null +++ b/sklearn_extra/preprocessing/__init__.py @@ -0,0 +1,9 @@ +from ._polynomial_basis import ( + PolynomialBasisTransformer, + BernsteinFeatures, +) + +__all__ = [ + "PolynomialBasisTransformer", + "BernsteinFeatures", +] \ No newline at end of file diff --git a/sklearn_extra/preprocessing/_polynomial_basis.py b/sklearn_extra/preprocessing/_polynomial_basis.py new file mode 100644 index 00000000..14b2cb06 --- /dev/null +++ b/sklearn_extra/preprocessing/_polynomial_basis.py @@ -0,0 +1,65 @@ +import numpy as np +from sklearn.base import BaseEstimator, TransformerMixin +from sklearn.utils.validation import check_array, check_is_fitted, check_scalar +from itertools import combinations +from scipy.stats import binom + + +class PolynomialBasisTransformer(BaseEstimator, TransformerMixin): + def __init__(self, degree=5, bias=False, na_value=0., interactions=False): + self.degree = degree + self.bias = bias + self.na_value = na_value + self.interactions = interactions + + def fit(self, X, y=None): + self.degree = check_scalar(self.degree, 'degree', int, min_val=0) + self.bias = check_scalar(self.bias, 'bias', bool) + self.na_value = check_scalar(self.na_value, 'na_value', float) + self.interactions = check_scalar(self.interactions, 'interactions', bool) + self.is_fitted_ = True + return self + + def transform(self, X, y=None): + check_is_fitted(self) + + X = check_array(X, estimator=self, input_name='X') + + # Get the number of columns in the input array + n_rows, n_features = X.shape + + # Compute the specific polynomial basis for each column + basis_features = [ + self.feature_matrix(X[:, i]) + for i in range(n_features) + ] + + # create interaction features - basis tensor products + if self.interactions: + interaction_features = [ + (u[:, None, :] * v[:, :, None]).reshape(n_rows, -1) + for u, v in combinations(basis_features, 2) + ] + result_basis = interaction_features + else: + result_basis = basis_features + + # remove the first basis function, if no bias is required + if not self.bias: + result_basis = [basis[:, 1:] for basis in result_basis] + + return np.hstack(result_basis) + + def feature_matrix(self, column): + vander = self.vandermonde_matrix(column) + return np.nan_to_num(vander, self.na_value) + + def vandermonde_matrix(self, column): + raise NotImplementedError("Subclasses must implement this method.") + + +class BernsteinFeatures(PolynomialBasisTransformer): + def vandermonde_matrix(self, column): + basis_idx = np.arange(1 + self.degree) + basis = binom.pmf(basis_idx, self.degree, column[:, None]) + return basis diff --git a/sklearn_extra/preprocessing/tests/test_polynomial_basis.py b/sklearn_extra/preprocessing/tests/test_polynomial_basis.py new file mode 100644 index 00000000..6f5a7db5 --- /dev/null +++ b/sklearn_extra/preprocessing/tests/test_polynomial_basis.py @@ -0,0 +1,112 @@ +import numpy as np +import pytest +from sklearn.utils.estimator_checks import parametrize_with_checks + +from sklearn_extra.preprocessing import BernsteinFeatures + +from sklearn.utils._testing import ( + assert_array_almost_equal, + assert_almost_equal, +) + +feature_1d = np.array([0, 0.5, 1, np.nan]) +feature_2d = np.array([ + [0, 0.25], + [0.5, 0.5], + [np.nan, 0.75] +]) + + +@parametrize_with_checks([BernsteinFeatures()]) +def test_sklearn_compatibility(estimator, check): + check(estimator) + +def test_correct_param_types(): + with pytest.raises(ValueError): + BernsteinFeatures(degree="a").fit(feature_1d) + + with pytest.raises(ValueError): + BernsteinFeatures(bias="a").fit(feature_1d) + + with pytest.raises(ValueError): + BernsteinFeatures(na_value="a").fit(feature_1d) + + with pytest.raises(ValueError): + BernsteinFeatures(interactions="a").fit(feature_1d) + + with pytest.raises(ValueError): + BernsteinFeatures(degree=1.5).fit(feature_1d) + + with pytest.raises(ValueError): + BernsteinFeatures(bias=1.5).fit(feature_1d) + + with pytest.raises(ValueError): + BernsteinFeatures(na_value="a").fit(feature_1d) + + with pytest.raises(ValueError): + BernsteinFeatures(interactions=1.5).fit(feature_1d) + + with pytest.raises(ValueError): + BernsteinFeatures(degree=-1).fit(feature_1d) + + with pytest.raises(ValueError): + BernsteinFeatures(bias=-1).fit(feature_1d) + + with pytest.raises(ValueError): + BernsteinFeatures(na_value=-1).fit(feature_1d) + + with pytest.raises(ValueError): + BernsteinFeatures(interactions=-1).fit(feature_1d) + + with pytest.raises(ValueError): + BernsteinFeatures(degree=1, bias=1).fit(feature_1d) + + with pytest.raises(ValueError): + BernsteinFeatures(degree=1, bias=1.5).fit(feature_1d) + + with pytest.raises(ValueError): + BernsteinFeatures(degree=1, bias=1, na_value=1).fit(feature_1d) + + with pytest.raises(ValueError): + BernsteinFeatures(degree=1, bias=1, na_value=1.5).fit(feature_1d) + + with pytest.raises(ValueError): + BernsteinFeatures(degree=1, bias=1, na_value=1, interactions=1).fit(feature_1d) + + with pytest.raises(ValueError): + BernsteinFeatures(degree=1, bias=1, na_value=1, interactions=1.5).fit(feature_1d) + + with pytest.raises(ValueError): + BernsteinFeatures(degree=1, bias=1, na_value=1, interactions=1, unknown=1).fit(feature_1d) + + +def test_correct_output_one_feature(): + bbt = BernsteinFeatures(degree=2).fit(np.empty(0)) + output = bbt.transform(feature_1d) + expected_output = np.array([ + [0.0, 0.0], + [0.5, 0.25], + [1.0, 1.0], + [0.0, 0.0] + ]) + assert_array_almost_equal(output, expected_output) + + +def test_correct_output_two_features(): + bbt = BernsteinFeatures(degree=2).fit(np.empty(0)) + output = bbt.transform(feature_2d) + expected_output = np.array([ + [0.0, 0.0, 0.25, 0.0625], + [0.5, 0.25, 0.5, 0.25], + [0.0, 0.0, 0.75, 0.5625]]) + assert_array_almost_equal(output, expected_output) + + +def test_correct_output_interactions(): + bbt = BernsteinFeatures(degree=2, interactions=True).fit(np.empty(0)) + output = bbt.transform(feature_2d) + expected_output = np.array([ + [0.0, 0.0, 0.25, 0.0, 0.0, 0.0625, 0.0, 0.0], + [0.5, 0.25, 0.5, 0.25, 0.125, 0.25, 0.125, 0.0625], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]) + assert_array_almost_equal(output, expected_output) From 0e6488429c0655c2c696d341562aee12a570821c Mon Sep 17 00:00:00 2001 From: ashtoff Date: Sun, 19 May 2024 23:30:17 +0300 Subject: [PATCH 02/21] Fixed a bug which disallowed passing NaN to bernstein transformers. --- sklearn_extra/preprocessing/_polynomial_basis.py | 2 +- sklearn_extra/preprocessing/tests/test_polynomial_basis.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/sklearn_extra/preprocessing/_polynomial_basis.py b/sklearn_extra/preprocessing/_polynomial_basis.py index 14b2cb06..46458096 100644 --- a/sklearn_extra/preprocessing/_polynomial_basis.py +++ b/sklearn_extra/preprocessing/_polynomial_basis.py @@ -23,7 +23,7 @@ def fit(self, X, y=None): def transform(self, X, y=None): check_is_fitted(self) - X = check_array(X, estimator=self, input_name='X') + X = check_array(X, estimator=self, input_name='X', force_all_finite='allow-nan') # Get the number of columns in the input array n_rows, n_features = X.shape diff --git a/sklearn_extra/preprocessing/tests/test_polynomial_basis.py b/sklearn_extra/preprocessing/tests/test_polynomial_basis.py index 6f5a7db5..d3000a48 100644 --- a/sklearn_extra/preprocessing/tests/test_polynomial_basis.py +++ b/sklearn_extra/preprocessing/tests/test_polynomial_basis.py @@ -6,7 +6,6 @@ from sklearn.utils._testing import ( assert_array_almost_equal, - assert_almost_equal, ) feature_1d = np.array([0, 0.5, 1, np.nan]) From aea1a79c7f36f9a64f7b75ca3bfafe650acf6daf Mon Sep 17 00:00:00 2001 From: ashtoff Date: Sun, 19 May 2024 23:36:29 +0300 Subject: [PATCH 03/21] Added documentation for the polynomial basis. --- .../preprocessing/_polynomial_basis.py | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/sklearn_extra/preprocessing/_polynomial_basis.py b/sklearn_extra/preprocessing/_polynomial_basis.py index 46458096..bf72fc0a 100644 --- a/sklearn_extra/preprocessing/_polynomial_basis.py +++ b/sklearn_extra/preprocessing/_polynomial_basis.py @@ -1,3 +1,8 @@ +"""BernsteinTransformer for generating polynomial features using the Bernstein basis.""" + +# Author: Alex Shtoff +# License: BSD 3 clause + import numpy as np from sklearn.base import BaseEstimator, TransformerMixin from sklearn.utils.validation import check_array, check_is_fitted, check_scalar @@ -6,6 +11,36 @@ class PolynomialBasisTransformer(BaseEstimator, TransformerMixin): + """ + Polynomial basis transformer for generating polynomial features. + + This transformer generates polynomial features of a given degree + for each data column separately, or tensor-product features for every + pair of columns if interactions=True. + + Parameters + ---------- + + degree : int, default=5 + The degree of the polynomial basis. + + bias : bool, default=False + If True, avoids generating the first basis function, assuming it + represents the constant term. + + na_value : float, default=0. + The value to replace NaNs in the input data with. + + interactions : bool, default=False + If True, generates tensor-product features for every pair of columns. If + bias=True, the product of the first two basis functions is excluded. + + + References + ---------- + [1] https://en.wikipedia.org/wiki/Bernstein_polynomial + [2] https://alexshtf.github.io/2024/02/11/Bernstein-Sklearn.html + """ def __init__(self, degree=5, bias=False, na_value=0., interactions=False): self.degree = degree self.bias = bias From b5141b9f18c004d29500bede54ea29b8782bdf24 Mon Sep 17 00:00:00 2001 From: ashtoff Date: Sun, 19 May 2024 23:38:53 +0300 Subject: [PATCH 04/21] Fixed documentation. --- .../preprocessing/_polynomial_basis.py | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/sklearn_extra/preprocessing/_polynomial_basis.py b/sklearn_extra/preprocessing/_polynomial_basis.py index bf72fc0a..19ab8029 100644 --- a/sklearn_extra/preprocessing/_polynomial_basis.py +++ b/sklearn_extra/preprocessing/_polynomial_basis.py @@ -36,10 +36,15 @@ class PolynomialBasisTransformer(BaseEstimator, TransformerMixin): bias=True, the product of the first two basis functions is excluded. + Notes + ----- + Inheriting classes should override the `vandermonde_matrix` method to + generate the Vandermonde matrix of the concrete polynomial basis. + + References ---------- - [1] https://en.wikipedia.org/wiki/Bernstein_polynomial - [2] https://alexshtf.github.io/2024/02/11/Bernstein-Sklearn.html + [1] https://en.wikipedia.org/wiki/Vandermonde_matrix """ def __init__(self, degree=5, bias=False, na_value=0., interactions=False): self.degree = degree @@ -94,6 +99,18 @@ def vandermonde_matrix(self, column): class BernsteinFeatures(PolynomialBasisTransformer): + """ + Polynomial basis transformer for generating polynomial features using the Bernstein basis. + + See Also + -------- + PolynomialBasisTransformer + + References + ---------- + [1] https://en.wikipedia.org/wiki/Bernstein_polynomial + [2]: https://alexshtf.github.io/2024/02/11/Bernstein-Sklearn.html + """ def vandermonde_matrix(self, column): basis_idx = np.arange(1 + self.degree) basis = binom.pmf(basis_idx, self.degree, column[:, None]) From 8eb965fad7caa390a79ef52689715b8f5c9c0d2b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 19 May 2024 20:40:51 +0000 Subject: [PATCH 05/21] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- sklearn_extra/preprocessing/__init__.py | 2 +- .../preprocessing/_polynomial_basis.py | 21 +++++--- .../tests/test_polynomial_basis.py | 50 +++++++++++-------- 3 files changed, 42 insertions(+), 31 deletions(-) diff --git a/sklearn_extra/preprocessing/__init__.py b/sklearn_extra/preprocessing/__init__.py index ecc96f00..e4226995 100644 --- a/sklearn_extra/preprocessing/__init__.py +++ b/sklearn_extra/preprocessing/__init__.py @@ -6,4 +6,4 @@ __all__ = [ "PolynomialBasisTransformer", "BernsteinFeatures", -] \ No newline at end of file +] diff --git a/sklearn_extra/preprocessing/_polynomial_basis.py b/sklearn_extra/preprocessing/_polynomial_basis.py index 19ab8029..ce8f1c33 100644 --- a/sklearn_extra/preprocessing/_polynomial_basis.py +++ b/sklearn_extra/preprocessing/_polynomial_basis.py @@ -46,32 +46,36 @@ class PolynomialBasisTransformer(BaseEstimator, TransformerMixin): ---------- [1] https://en.wikipedia.org/wiki/Vandermonde_matrix """ - def __init__(self, degree=5, bias=False, na_value=0., interactions=False): + + def __init__(self, degree=5, bias=False, na_value=0.0, interactions=False): self.degree = degree self.bias = bias self.na_value = na_value self.interactions = interactions def fit(self, X, y=None): - self.degree = check_scalar(self.degree, 'degree', int, min_val=0) - self.bias = check_scalar(self.bias, 'bias', bool) - self.na_value = check_scalar(self.na_value, 'na_value', float) - self.interactions = check_scalar(self.interactions, 'interactions', bool) + self.degree = check_scalar(self.degree, "degree", int, min_val=0) + self.bias = check_scalar(self.bias, "bias", bool) + self.na_value = check_scalar(self.na_value, "na_value", float) + self.interactions = check_scalar( + self.interactions, "interactions", bool + ) self.is_fitted_ = True return self def transform(self, X, y=None): check_is_fitted(self) - X = check_array(X, estimator=self, input_name='X', force_all_finite='allow-nan') + X = check_array( + X, estimator=self, input_name="X", force_all_finite="allow-nan" + ) # Get the number of columns in the input array n_rows, n_features = X.shape # Compute the specific polynomial basis for each column basis_features = [ - self.feature_matrix(X[:, i]) - for i in range(n_features) + self.feature_matrix(X[:, i]) for i in range(n_features) ] # create interaction features - basis tensor products @@ -111,6 +115,7 @@ class BernsteinFeatures(PolynomialBasisTransformer): [1] https://en.wikipedia.org/wiki/Bernstein_polynomial [2]: https://alexshtf.github.io/2024/02/11/Bernstein-Sklearn.html """ + def vandermonde_matrix(self, column): basis_idx = np.arange(1 + self.degree) basis = binom.pmf(basis_idx, self.degree, column[:, None]) diff --git a/sklearn_extra/preprocessing/tests/test_polynomial_basis.py b/sklearn_extra/preprocessing/tests/test_polynomial_basis.py index d3000a48..84d1ca5b 100644 --- a/sklearn_extra/preprocessing/tests/test_polynomial_basis.py +++ b/sklearn_extra/preprocessing/tests/test_polynomial_basis.py @@ -9,17 +9,14 @@ ) feature_1d = np.array([0, 0.5, 1, np.nan]) -feature_2d = np.array([ - [0, 0.25], - [0.5, 0.5], - [np.nan, 0.75] -]) +feature_2d = np.array([[0, 0.25], [0.5, 0.5], [np.nan, 0.75]]) @parametrize_with_checks([BernsteinFeatures()]) def test_sklearn_compatibility(estimator, check): check(estimator) + def test_correct_param_types(): with pytest.raises(ValueError): BernsteinFeatures(degree="a").fit(feature_1d) @@ -70,42 +67,51 @@ def test_correct_param_types(): BernsteinFeatures(degree=1, bias=1, na_value=1.5).fit(feature_1d) with pytest.raises(ValueError): - BernsteinFeatures(degree=1, bias=1, na_value=1, interactions=1).fit(feature_1d) + BernsteinFeatures(degree=1, bias=1, na_value=1, interactions=1).fit( + feature_1d + ) with pytest.raises(ValueError): - BernsteinFeatures(degree=1, bias=1, na_value=1, interactions=1.5).fit(feature_1d) + BernsteinFeatures(degree=1, bias=1, na_value=1, interactions=1.5).fit( + feature_1d + ) with pytest.raises(ValueError): - BernsteinFeatures(degree=1, bias=1, na_value=1, interactions=1, unknown=1).fit(feature_1d) + BernsteinFeatures( + degree=1, bias=1, na_value=1, interactions=1, unknown=1 + ).fit(feature_1d) def test_correct_output_one_feature(): bbt = BernsteinFeatures(degree=2).fit(np.empty(0)) output = bbt.transform(feature_1d) - expected_output = np.array([ - [0.0, 0.0], - [0.5, 0.25], - [1.0, 1.0], - [0.0, 0.0] - ]) + expected_output = np.array( + [[0.0, 0.0], [0.5, 0.25], [1.0, 1.0], [0.0, 0.0]] + ) assert_array_almost_equal(output, expected_output) def test_correct_output_two_features(): bbt = BernsteinFeatures(degree=2).fit(np.empty(0)) output = bbt.transform(feature_2d) - expected_output = np.array([ - [0.0, 0.0, 0.25, 0.0625], - [0.5, 0.25, 0.5, 0.25], - [0.0, 0.0, 0.75, 0.5625]]) + expected_output = np.array( + [ + [0.0, 0.0, 0.25, 0.0625], + [0.5, 0.25, 0.5, 0.25], + [0.0, 0.0, 0.75, 0.5625], + ] + ) assert_array_almost_equal(output, expected_output) def test_correct_output_interactions(): bbt = BernsteinFeatures(degree=2, interactions=True).fit(np.empty(0)) output = bbt.transform(feature_2d) - expected_output = np.array([ - [0.0, 0.0, 0.25, 0.0, 0.0, 0.0625, 0.0, 0.0], - [0.5, 0.25, 0.5, 0.25, 0.125, 0.25, 0.125, 0.0625], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]) + expected_output = np.array( + [ + [0.0, 0.0, 0.25, 0.0, 0.0, 0.0625, 0.0, 0.0], + [0.5, 0.25, 0.5, 0.25, 0.125, 0.25, 0.125, 0.0625], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + ] + ) assert_array_almost_equal(output, expected_output) From 45f6e309611b56c3f4540825efe2776d2637171a Mon Sep 17 00:00:00 2001 From: ashtoff Date: Sun, 19 May 2024 23:52:07 +0300 Subject: [PATCH 06/21] Add input array checks. --- sklearn_extra/preprocessing/_polynomial_basis.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn_extra/preprocessing/_polynomial_basis.py b/sklearn_extra/preprocessing/_polynomial_basis.py index 19ab8029..7dd19e88 100644 --- a/sklearn_extra/preprocessing/_polynomial_basis.py +++ b/sklearn_extra/preprocessing/_polynomial_basis.py @@ -57,6 +57,7 @@ def fit(self, X, y=None): self.bias = check_scalar(self.bias, 'bias', bool) self.na_value = check_scalar(self.na_value, 'na_value', float) self.interactions = check_scalar(self.interactions, 'interactions', bool) + check_array(X, estimator=self, input_name='X', force_all_finite='allow-nan') return self self.is_fitted_ = True return self From 44bcfbfbe883c5737abf6a910c92011116471e7d Mon Sep 17 00:00:00 2001 From: ashtoff Date: Sun, 19 May 2024 23:53:28 +0300 Subject: [PATCH 07/21] Bugfix. --- sklearn_extra/preprocessing/_polynomial_basis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn_extra/preprocessing/_polynomial_basis.py b/sklearn_extra/preprocessing/_polynomial_basis.py index c0fde6c9..ebb1199d 100644 --- a/sklearn_extra/preprocessing/_polynomial_basis.py +++ b/sklearn_extra/preprocessing/_polynomial_basis.py @@ -60,7 +60,7 @@ def fit(self, X, y=None): self.interactions = check_scalar( self.interactions, "interactions", bool ) - check_array(X, estimator=self, input_name='X', force_all_finite='allow-nan') return self + check_array(X, estimator=self, input_name='X', force_all_finite='allow-nan') self.is_fitted_ = True return self From 6b155692da814ce02a6f0a36ae27928395f288d5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 19 May 2024 20:53:38 +0000 Subject: [PATCH 08/21] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- sklearn_extra/preprocessing/_polynomial_basis.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sklearn_extra/preprocessing/_polynomial_basis.py b/sklearn_extra/preprocessing/_polynomial_basis.py index ebb1199d..7a076dbb 100644 --- a/sklearn_extra/preprocessing/_polynomial_basis.py +++ b/sklearn_extra/preprocessing/_polynomial_basis.py @@ -60,7 +60,9 @@ def fit(self, X, y=None): self.interactions = check_scalar( self.interactions, "interactions", bool ) - check_array(X, estimator=self, input_name='X', force_all_finite='allow-nan') + check_array( + X, estimator=self, input_name="X", force_all_finite="allow-nan" + ) self.is_fitted_ = True return self From d5d7203f9efaeaade6ea5935651390ac0ee930d7 Mon Sep 17 00:00:00 2001 From: ashtoff Date: Mon, 20 May 2024 00:10:03 +0300 Subject: [PATCH 09/21] More bugfixes. --- sklearn_extra/preprocessing/_polynomial_basis.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/sklearn_extra/preprocessing/_polynomial_basis.py b/sklearn_extra/preprocessing/_polynomial_basis.py index 7a076dbb..9e6735a9 100644 --- a/sklearn_extra/preprocessing/_polynomial_basis.py +++ b/sklearn_extra/preprocessing/_polynomial_basis.py @@ -5,12 +5,12 @@ import numpy as np from sklearn.base import BaseEstimator, TransformerMixin -from sklearn.utils.validation import check_array, check_is_fitted, check_scalar +from sklearn.utils.validation import FLOAT_DTYPES, check_is_fitted, check_scalar from itertools import combinations from scipy.stats import binom -class PolynomialBasisTransformer(BaseEstimator, TransformerMixin): +class PolynomialBasisTransformer(TransformerMixin, BaseEstimator): """ Polynomial basis transformer for generating polynomial features. @@ -60,18 +60,14 @@ def fit(self, X, y=None): self.interactions = check_scalar( self.interactions, "interactions", bool ) - check_array( - X, estimator=self, input_name="X", force_all_finite="allow-nan" - ) + self._validate_data(X, force_all_finite="allow-nan") self.is_fitted_ = True return self - def transform(self, X, y=None): + def transform(self, X): check_is_fitted(self) - X = check_array( - X, estimator=self, input_name="X", force_all_finite="allow-nan" - ) + X = self._validate_data(X, order="F", dtype=FLOAT_DTYPES, reset=False, force_all_finite="allow-nan") # Get the number of columns in the input array n_rows, n_features = X.shape From cb77b0966d8e8dccaef404a1cbfebe7f1e845424 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 19 May 2024 21:10:16 +0000 Subject: [PATCH 10/21] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- sklearn_extra/preprocessing/_polynomial_basis.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/sklearn_extra/preprocessing/_polynomial_basis.py b/sklearn_extra/preprocessing/_polynomial_basis.py index 9e6735a9..4c3a40b7 100644 --- a/sklearn_extra/preprocessing/_polynomial_basis.py +++ b/sklearn_extra/preprocessing/_polynomial_basis.py @@ -5,7 +5,11 @@ import numpy as np from sklearn.base import BaseEstimator, TransformerMixin -from sklearn.utils.validation import FLOAT_DTYPES, check_is_fitted, check_scalar +from sklearn.utils.validation import ( + FLOAT_DTYPES, + check_is_fitted, + check_scalar, +) from itertools import combinations from scipy.stats import binom @@ -67,7 +71,13 @@ def fit(self, X, y=None): def transform(self, X): check_is_fitted(self) - X = self._validate_data(X, order="F", dtype=FLOAT_DTYPES, reset=False, force_all_finite="allow-nan") + X = self._validate_data( + X, + order="F", + dtype=FLOAT_DTYPES, + reset=False, + force_all_finite="allow-nan", + ) # Get the number of columns in the input array n_rows, n_features = X.shape From 280d0bec31ca111ba1a57889b278994845d1a6fa Mon Sep 17 00:00:00 2001 From: ashtoff Date: Mon, 20 May 2024 00:14:53 +0300 Subject: [PATCH 11/21] No need to validate order of X. --- sklearn_extra/preprocessing/_polynomial_basis.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn_extra/preprocessing/_polynomial_basis.py b/sklearn_extra/preprocessing/_polynomial_basis.py index 4c3a40b7..dd30bb5f 100644 --- a/sklearn_extra/preprocessing/_polynomial_basis.py +++ b/sklearn_extra/preprocessing/_polynomial_basis.py @@ -73,7 +73,6 @@ def transform(self, X): X = self._validate_data( X, - order="F", dtype=FLOAT_DTYPES, reset=False, force_all_finite="allow-nan", From d6d720e9ededad9e71f2f3426decc4f7b0e3ffac Mon Sep 17 00:00:00 2001 From: ashtoff Date: Mon, 20 May 2024 00:24:44 +0300 Subject: [PATCH 12/21] Added estimator tags. --- sklearn_extra/preprocessing/_polynomial_basis.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sklearn_extra/preprocessing/_polynomial_basis.py b/sklearn_extra/preprocessing/_polynomial_basis.py index dd30bb5f..e606b976 100644 --- a/sklearn_extra/preprocessing/_polynomial_basis.py +++ b/sklearn_extra/preprocessing/_polynomial_basis.py @@ -109,6 +109,13 @@ def feature_matrix(self, column): def vandermonde_matrix(self, column): raise NotImplementedError("Subclasses must implement this method.") + def _get_tags(self): + base_tags = super()._get_tags() + return base_tags | { + "allow_nan": True, + "requires_y": False, + "stateless": True + } class BernsteinFeatures(PolynomialBasisTransformer): """ From 23bbd53d6922460aebe4020d927d7a54de6a48d8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 19 May 2024 21:25:05 +0000 Subject: [PATCH 13/21] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- sklearn_extra/preprocessing/_polynomial_basis.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn_extra/preprocessing/_polynomial_basis.py b/sklearn_extra/preprocessing/_polynomial_basis.py index e606b976..ac94cd91 100644 --- a/sklearn_extra/preprocessing/_polynomial_basis.py +++ b/sklearn_extra/preprocessing/_polynomial_basis.py @@ -114,9 +114,10 @@ def _get_tags(self): return base_tags | { "allow_nan": True, "requires_y": False, - "stateless": True + "stateless": True, } + class BernsteinFeatures(PolynomialBasisTransformer): """ Polynomial basis transformer for generating polynomial features using the Bernstein basis. From 457d76df732e73d627623f74c1e65c7cbad5dd03 Mon Sep 17 00:00:00 2001 From: ashtoff Date: Mon, 20 May 2024 00:25:42 +0300 Subject: [PATCH 14/21] 1D feature is now a column, rather than a row. --- sklearn_extra/preprocessing/tests/test_polynomial_basis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn_extra/preprocessing/tests/test_polynomial_basis.py b/sklearn_extra/preprocessing/tests/test_polynomial_basis.py index 84d1ca5b..ccd9fcef 100644 --- a/sklearn_extra/preprocessing/tests/test_polynomial_basis.py +++ b/sklearn_extra/preprocessing/tests/test_polynomial_basis.py @@ -8,7 +8,7 @@ assert_array_almost_equal, ) -feature_1d = np.array([0, 0.5, 1, np.nan]) +feature_1d = np.array([0, 0.5, 1, np.nan]).reshape(-1, 1) feature_2d = np.array([[0, 0.25], [0.5, 0.5], [np.nan, 0.75]]) From 457da1ff5184830b63bb6bc9d688cb7de6e33b4d Mon Sep 17 00:00:00 2001 From: ashtoff Date: Mon, 20 May 2024 00:31:20 +0300 Subject: [PATCH 15/21] Removed the stateless flag. --- sklearn_extra/preprocessing/_polynomial_basis.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn_extra/preprocessing/_polynomial_basis.py b/sklearn_extra/preprocessing/_polynomial_basis.py index ac94cd91..e818ddfa 100644 --- a/sklearn_extra/preprocessing/_polynomial_basis.py +++ b/sklearn_extra/preprocessing/_polynomial_basis.py @@ -114,7 +114,6 @@ def _get_tags(self): return base_tags | { "allow_nan": True, "requires_y": False, - "stateless": True, } From 5f02b3a07c4fcaa849d8158aa777d4a6f2876a11 Mon Sep 17 00:00:00 2001 From: ashtoff Date: Mon, 20 May 2024 00:34:37 +0300 Subject: [PATCH 16/21] Fixed test_correct_param_types() test. --- .../tests/test_polynomial_basis.py | 60 ++----------------- 1 file changed, 6 insertions(+), 54 deletions(-) diff --git a/sklearn_extra/preprocessing/tests/test_polynomial_basis.py b/sklearn_extra/preprocessing/tests/test_polynomial_basis.py index ccd9fcef..02800801 100644 --- a/sklearn_extra/preprocessing/tests/test_polynomial_basis.py +++ b/sklearn_extra/preprocessing/tests/test_polynomial_basis.py @@ -18,69 +18,21 @@ def test_sklearn_compatibility(estimator, check): def test_correct_param_types(): - with pytest.raises(ValueError): - BernsteinFeatures(degree="a").fit(feature_1d) - - with pytest.raises(ValueError): - BernsteinFeatures(bias="a").fit(feature_1d) - - with pytest.raises(ValueError): + with pytest.raises(TypeError): BernsteinFeatures(na_value="a").fit(feature_1d) - with pytest.raises(ValueError): - BernsteinFeatures(interactions="a").fit(feature_1d) - - with pytest.raises(ValueError): - BernsteinFeatures(degree=1.5).fit(feature_1d) - - with pytest.raises(ValueError): - BernsteinFeatures(bias=1.5).fit(feature_1d) - - with pytest.raises(ValueError): - BernsteinFeatures(na_value="a").fit(feature_1d) - - with pytest.raises(ValueError): - BernsteinFeatures(interactions=1.5).fit(feature_1d) - with pytest.raises(ValueError): BernsteinFeatures(degree=-1).fit(feature_1d) - with pytest.raises(ValueError): - BernsteinFeatures(bias=-1).fit(feature_1d) + with pytest.raises(TypeError): + BernsteinFeatures(degree="a").fit(feature_1d) - with pytest.raises(ValueError): - BernsteinFeatures(na_value=-1).fit(feature_1d) + with pytest.raises(TypeError): + BernsteinFeatures(bias=-1).fit(feature_1d) - with pytest.raises(ValueError): + with pytest.raises(TypeError): BernsteinFeatures(interactions=-1).fit(feature_1d) - with pytest.raises(ValueError): - BernsteinFeatures(degree=1, bias=1).fit(feature_1d) - - with pytest.raises(ValueError): - BernsteinFeatures(degree=1, bias=1.5).fit(feature_1d) - - with pytest.raises(ValueError): - BernsteinFeatures(degree=1, bias=1, na_value=1).fit(feature_1d) - - with pytest.raises(ValueError): - BernsteinFeatures(degree=1, bias=1, na_value=1.5).fit(feature_1d) - - with pytest.raises(ValueError): - BernsteinFeatures(degree=1, bias=1, na_value=1, interactions=1).fit( - feature_1d - ) - - with pytest.raises(ValueError): - BernsteinFeatures(degree=1, bias=1, na_value=1, interactions=1.5).fit( - feature_1d - ) - - with pytest.raises(ValueError): - BernsteinFeatures( - degree=1, bias=1, na_value=1, interactions=1, unknown=1 - ).fit(feature_1d) - def test_correct_output_one_feature(): bbt = BernsteinFeatures(degree=2).fit(np.empty(0)) From b8aa359ee6c1d5dd5925bf3c33f9281ce642a0c9 Mon Sep 17 00:00:00 2001 From: ashtoff Date: Mon, 20 May 2024 00:44:48 +0300 Subject: [PATCH 17/21] Fixed tests. --- sklearn_extra/preprocessing/tests/test_polynomial_basis.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn_extra/preprocessing/tests/test_polynomial_basis.py b/sklearn_extra/preprocessing/tests/test_polynomial_basis.py index 02800801..6875faf8 100644 --- a/sklearn_extra/preprocessing/tests/test_polynomial_basis.py +++ b/sklearn_extra/preprocessing/tests/test_polynomial_basis.py @@ -44,7 +44,7 @@ def test_correct_output_one_feature(): def test_correct_output_two_features(): - bbt = BernsteinFeatures(degree=2).fit(np.empty(0)) + bbt = BernsteinFeatures(degree=2).fit(feature_2d) output = bbt.transform(feature_2d) expected_output = np.array( [ @@ -57,7 +57,7 @@ def test_correct_output_two_features(): def test_correct_output_interactions(): - bbt = BernsteinFeatures(degree=2, interactions=True).fit(np.empty(0)) + bbt = BernsteinFeatures(degree=2, interactions=True).fit(feature_2d) output = bbt.transform(feature_2d) expected_output = np.array( [ From 6ffd8ec90857737bce584ce17617bd8b3cc28221 Mon Sep 17 00:00:00 2001 From: ashtoff Date: Mon, 20 May 2024 00:45:22 +0300 Subject: [PATCH 18/21] More test fixes. --- sklearn_extra/preprocessing/tests/test_polynomial_basis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn_extra/preprocessing/tests/test_polynomial_basis.py b/sklearn_extra/preprocessing/tests/test_polynomial_basis.py index 6875faf8..05d7e489 100644 --- a/sklearn_extra/preprocessing/tests/test_polynomial_basis.py +++ b/sklearn_extra/preprocessing/tests/test_polynomial_basis.py @@ -35,7 +35,7 @@ def test_correct_param_types(): def test_correct_output_one_feature(): - bbt = BernsteinFeatures(degree=2).fit(np.empty(0)) + bbt = BernsteinFeatures(degree=2).fit(feature_1d) output = bbt.transform(feature_1d) expected_output = np.array( [[0.0, 0.0], [0.5, 0.25], [1.0, 1.0], [0.0, 0.0]] From 2c14c298c3071d2154e6c6eb5de68ae196232d4f Mon Sep 17 00:00:00 2001 From: ashtoff Date: Mon, 20 May 2024 00:59:30 +0300 Subject: [PATCH 19/21] fixed expected output in tests. --- .../preprocessing/tests/test_polynomial_basis.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/sklearn_extra/preprocessing/tests/test_polynomial_basis.py b/sklearn_extra/preprocessing/tests/test_polynomial_basis.py index 05d7e489..06274dbb 100644 --- a/sklearn_extra/preprocessing/tests/test_polynomial_basis.py +++ b/sklearn_extra/preprocessing/tests/test_polynomial_basis.py @@ -37,8 +37,12 @@ def test_correct_param_types(): def test_correct_output_one_feature(): bbt = BernsteinFeatures(degree=2).fit(feature_1d) output = bbt.transform(feature_1d) + print(output) expected_output = np.array( - [[0.0, 0.0], [0.5, 0.25], [1.0, 1.0], [0.0, 0.0]] + [[0.0, 0.0], + [0.5, 0.25], + [0.0, 1.0], + [0.0, 0.0]] ) assert_array_almost_equal(output, expected_output) @@ -48,9 +52,9 @@ def test_correct_output_two_features(): output = bbt.transform(feature_2d) expected_output = np.array( [ - [0.0, 0.0, 0.25, 0.0625], + [0.0, 0.0, 0.375, 0.0625], [0.5, 0.25, 0.5, 0.25], - [0.0, 0.0, 0.75, 0.5625], + [0.0, 0.0, 0.375, 0.5625], ] ) assert_array_almost_equal(output, expected_output) @@ -61,8 +65,8 @@ def test_correct_output_interactions(): output = bbt.transform(feature_2d) expected_output = np.array( [ - [0.0, 0.0, 0.25, 0.0, 0.0, 0.0625, 0.0, 0.0], - [0.5, 0.25, 0.5, 0.25, 0.125, 0.25, 0.125, 0.0625], + [0.0, 0.0, 0.375, 0.0, 0.0, 0.0625, 0.0, 0.0], + [0.125, 0.0625, 0.125, 0.25, 0.125, 0.0625, 0.125, 0.0625], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], ] ) From 27c310fd6699ff6b729343095b85ca277affbb37 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 19 May 2024 21:59:45 +0000 Subject: [PATCH 20/21] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- sklearn_extra/preprocessing/tests/test_polynomial_basis.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sklearn_extra/preprocessing/tests/test_polynomial_basis.py b/sklearn_extra/preprocessing/tests/test_polynomial_basis.py index 06274dbb..4d0ee433 100644 --- a/sklearn_extra/preprocessing/tests/test_polynomial_basis.py +++ b/sklearn_extra/preprocessing/tests/test_polynomial_basis.py @@ -39,10 +39,7 @@ def test_correct_output_one_feature(): output = bbt.transform(feature_1d) print(output) expected_output = np.array( - [[0.0, 0.0], - [0.5, 0.25], - [0.0, 1.0], - [0.0, 0.0]] + [[0.0, 0.0], [0.5, 0.25], [0.0, 1.0], [0.0, 0.0]] ) assert_array_almost_equal(output, expected_output) From 15df21ec87792227c416a517c981e1e47d758fa0 Mon Sep 17 00:00:00 2001 From: ashtoff Date: Mon, 20 May 2024 00:59:57 +0300 Subject: [PATCH 21/21] Remove redundant print statement. --- sklearn_extra/preprocessing/tests/test_polynomial_basis.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn_extra/preprocessing/tests/test_polynomial_basis.py b/sklearn_extra/preprocessing/tests/test_polynomial_basis.py index 06274dbb..e24d1b84 100644 --- a/sklearn_extra/preprocessing/tests/test_polynomial_basis.py +++ b/sklearn_extra/preprocessing/tests/test_polynomial_basis.py @@ -37,7 +37,6 @@ def test_correct_param_types(): def test_correct_output_one_feature(): bbt = BernsteinFeatures(degree=2).fit(feature_1d) output = bbt.transform(feature_1d) - print(output) expected_output = np.array( [[0.0, 0.0], [0.5, 0.25],