scikit-learn-contrib
diff --git a/‎src/skmatter/_selection.py
Lines changed: 10 additions & 11 deletions b/‎src/skmatter/_selection.py
Lines changed: 10 additions & 11 deletions
diff --git a/‎src/skmatter/decomposition/_kernel_pcovr.py
Lines changed: 46 additions & 37 deletions b/‎src/skmatter/decomposition/_kernel_pcovr.py
Lines changed: 46 additions & 37 deletions
@@ -63,7 +63,7 @@ class GreedySelector(SelectorMixin, MetaEstimatorMixin, BaseEstimator):
     progress_bar: bool, default=False
               option to use `tqdm <https://tqdm.github.io/>`_
               progress bar to monitor selections. Stored in
-              :py:attr:`self.report_progress`.
+              :py:attr:`self.report_progress_`.
 
     full : bool, default=False
         In the case that all non-redundant selections are exhausted, choose
@@ -98,7 +98,6 @@ def __init__(
         self.n_to_select = n_to_select
         self.score_threshold = score_threshold
         self.score_threshold_type = score_threshold_type
-        self._first_score = None
         if self.score_threshold_type not in ["relative", "absolute"]:
             raise ValueError(
                 "invalid score_threshold_type, expected one of 'relative' or 'absolute'"
@@ -141,9 +140,9 @@ def fit(self, X, y=None, warm_start=False):
             )
 
         if self.progress_bar is True:
-            self.report_progress = get_progress_bar()
+            self.report_progress_ = get_progress_bar()
         elif self.progress_bar is False:
-            self.report_progress = no_progress_bar
+            self.report_progress_ = no_progress_bar
 
         params = dict(
             accept_sparse="csc",
@@ -168,6 +167,8 @@ def fit(self, X, y=None, warm_start=False):
 
         n_to_select_from = X.shape[self._axis]
 
+        self.n_samples_in_, self.n_features_in_ = X.shape
+
         error_msg = (
             "n_to_select must be either None, an "
             f"integer in [1, n_{self.selection_type}s] "
@@ -203,7 +204,7 @@ def fit(self, X, y=None, warm_start=False):
 
         n_iterations -= self.n_selected_
 
-        for n in self.report_progress(range(n_iterations)):
+        for n in self.report_progress_(range(n_iterations)):
             new_idx = self._get_best_new_selection(self.score, X, y)
             if new_idx is not None:
                 self._update_post_selection(X, y, new_idx)
@@ -258,9 +259,6 @@ def transform(self, X, y=None):
             ensure_2d=self._axis,
         )
 
-        if len(mask) != X.shape[self._axis]:
-            raise ValueError("X has a different shape than during fitting.")
-
         if self._axis == 1:
             return X[:, safe_mask(X, mask)]
         else:
@@ -326,6 +324,7 @@ def _init_greedy_search(self, X, y, n_to_select):
         """Initializes the search. Prepares an array to store the selected features."""
 
         self.n_selected_ = 0
+        self.first_score_ = None
 
         sel_shape = list(X.shape)
         sel_shape[self._axis] = n_to_select
@@ -368,15 +367,15 @@ def _get_best_new_selection(self, scorer, X, y):
 
         max_score_idx = np.argmax(scores)
         if self.score_threshold is not None:
-            if self._first_score is None:
-                self._first_score = scores[max_score_idx]
+            if self.first_score_ is None:
+                self.first_score_ = scores[max_score_idx]
 
             if self.score_threshold_type == "absolute":
                 if scores[max_score_idx] < self.score_threshold:
                     return None
 
             if self.score_threshold_type == "relative":
-                if scores[max_score_idx] / self._first_score < self.score_threshold:
+                if scores[max_score_idx] / self.first_score_ < self.score_threshold:
                     return None
 
         return max_score_idx
 
@@ -220,7 +220,6 @@ def __init__(
         self.kernel_params = kernel_params
 
         self.n_jobs = n_jobs
-        self.n_samples_ = None
 
         self.fit_inverse_transform = fit_inverse_transform
 
@@ -308,17 +307,19 @@ def fit(self, X, Y, W=None):
 
         if self.n_components is None:
             if self.svd_solver != "arpack":
-                self.n_components = X.shape[0]
+                self.n_components_ = X.shape[0]
             else:
-                self.n_components = X.shape[0] - 1
+                self.n_components_ = X.shape[0] - 1
+        else:
+            self.n_components_ = self.n_components
 
         K = self._get_kernel(X)
 
         if self.center:
             self.centerer_ = KernelNormalizer()
             K = self.centerer_.fit_transform(K)
 
-        self.n_samples_ = X.shape[0]
+        self.n_samples_in_, self.n_features_in_ = X.shape
 
         if self.regressor != "precomputed":
             if self.regressor is None:
@@ -362,7 +363,7 @@ def fit(self, X, Y, W=None):
             # to avoid needing to compute the kernel a second time
             self.regressor_ = check_krr_fit(regressor, K, X, Y)
 
-            W = self.regressor_.dual_coef_.reshape(X.shape[0], -1)
+            W = self.regressor_.dual_coef_.reshape(self.n_samples_in_, -1)
 
             # Use this instead of `self.regressor_.predict(K)`
             # so that we can handle the case of the pre-fitted regressor
@@ -387,12 +388,17 @@ def fit(self, X, Y, W=None):
         # Handle svd_solver
         self._fit_svd_solver = self.svd_solver
         if self._fit_svd_solver == "auto":
-            # Small problem or self.n_components == 'mle', just call full PCA
-            if max(X.shape) <= 500 or self.n_components == "mle":
+            # Small problem or self.n_components_ == 'mle', just call full PCA
+            if (
+                max(self.n_samples_in_, self.n_features_in_) <= 500
+                or self.n_components_ == "mle"
+            ):
                 self._fit_svd_solver = "full"
-            elif self.n_components >= 1 and self.n_components < 0.8 * min(X.shape):
+            elif self.n_components_ >= 1 and self.n_components_ < 0.8 * max(
+                self.n_samples_in_, self.n_features_in_
+            ):
                 self._fit_svd_solver = "randomized"
-            # This is also the case of self.n_components in (0,1)
+            # This is also the case of self.n_components_ in (0,1)
             else:
                 self._fit_svd_solver = "full"
 
@@ -536,31 +542,31 @@ def score(self, X, Y):
         return -sum([Lkpca, Lkrr])
 
     def _decompose_truncated(self, mat):
-        if not 1 <= self.n_components <= self.n_samples_:
+        if not 1 <= self.n_components_ <= self.n_samples_in_:
             raise ValueError(
                 "n_components=%r must be between 1 and "
                 "n_samples=%r with "
                 "svd_solver='%s'"
                 % (
-                    self.n_components,
-                    self.n_samples_,
+                    self.n_components_,
+                    self.n_samples_in_,
                     self.svd_solver,
                 )
             )
-        elif not isinstance(self.n_components, numbers.Integral):
+        elif not isinstance(self.n_components_, numbers.Integral):
             raise ValueError(
                 "n_components=%r must be of type int "
                 "when greater than or equal to 1, was of type=%r"
-                % (self.n_components, type(self.n_components))
+                % (self.n_components_, type(self.n_components_))
             )
-        elif self.svd_solver == "arpack" and self.n_components == self.n_samples_:
+        elif self.svd_solver == "arpack" and self.n_components_ == self.n_samples_in_:
             raise ValueError(
                 "n_components=%r must be strictly less than "
                 "n_samples=%r with "
                 "svd_solver='%s'"
                 % (
-                    self.n_components,
-                    self.n_samples_,
+                    self.n_components_,
+                    self.n_samples_in_,
                     self.svd_solver,
                 )
             )
@@ -569,7 +575,7 @@ def _decompose_truncated(self, mat):
 
         if self._fit_svd_solver == "arpack":
             v0 = _init_arpack_v0(min(mat.shape), random_state)
-            U, S, Vt = svds(mat, k=self.n_components, tol=self.tol, v0=v0)
+            U, S, Vt = svds(mat, k=self.n_components_, tol=self.tol, v0=v0)
             # svds doesn't abide by scipy.linalg.svd/randomized_svd
             # conventions, so reverse its outputs.
             S = S[::-1]
@@ -581,7 +587,7 @@ def _decompose_truncated(self, mat):
             # sign flipping is done inside
             U, S, Vt = randomized_svd(
                 mat,
-                n_components=self.n_components,
+                n_components=self.n_components_,
                 n_iter=self.iterated_power,
                 flip_sign=True,
                 random_state=random_state,
@@ -594,24 +600,25 @@ def _decompose_truncated(self, mat):
         return U, S, Vt
 
     def _decompose_full(self, mat):
-        if self.n_components != "mle":
-            if not (0 <= self.n_components <= self.n_samples_):
+        if self.n_components_ != "mle":
+            if not (0 <= self.n_components_ <= self.n_samples_in_):
                 raise ValueError(
                     "n_components=%r must be between 1 and "
                     "n_samples=%r with "
                     "svd_solver='%s'"
                     % (
-                        self.n_components,
-                        self.n_samples_,
+                        self.n_components_,
+                        self.n_samples_in_,
                         self.svd_solver,
                     )
                 )
-            elif self.n_components >= 1:
-                if not isinstance(self.n_components, numbers.Integral):
+            elif self.n_components_ >= 1:
+                if not isinstance(self.n_components_, numbers.Integral):
                     raise ValueError(
                         "n_components=%r must be of type int "
                         "when greater than or equal to 1, "
-                        "was of type=%r" % (self.n_components, type(self.n_components))
+                        "was of type=%r"
+                        % (self.n_components_, type(self.n_components_))
                     )
 
         U, S, Vt = linalg.svd(mat, full_matrices=False)
@@ -623,26 +630,28 @@ def _decompose_full(self, mat):
         U, Vt = svd_flip(U, Vt)
 
         # Get variance explained by singular values
-        explained_variance_ = (S**2) / (self.n_samples_ - 1)
+        explained_variance_ = (S**2) / (self.n_samples_in_ - 1)
         total_var = explained_variance_.sum()
         explained_variance_ratio_ = explained_variance_ / total_var
 
         # Postprocess the number of components required
-        if self.n_components == "mle":
-            self.n_components = _infer_dimension(explained_variance_, self.n_samples_)
-        elif 0 < self.n_components < 1.0:
+        if self.n_components_ == "mle":
+            self.n_components_ = _infer_dimension(
+                explained_variance_, self.n_samples_in_
+            )
+        elif 0 < self.n_components_ < 1.0:
             # number of components for which the cumulated explained
             # variance percentage is superior to the desired threshold
             # side='right' ensures that number of features selected
-            # their variance is always greater than self.n_components float
+            # their variance is always greater than self.n_components_ float
             # passed. More discussion in issue: #15669
             ratio_cumsum = stable_cumsum(explained_variance_ratio_)
-            self.n_components = (
-                np.searchsorted(ratio_cumsum, self.n_components, side="right") + 1
+            self.n_components_ = (
+                np.searchsorted(ratio_cumsum, self.n_components_, side="right") + 1
             )
-        self.n_components = self.n_components
+
         return (
-            U[:, : self.n_components],
-            S[: self.n_components],
-            Vt[: self.n_components],
+            U[:, : self.n_components_],
+            S[: self.n_components_],
+            Vt[: self.n_components_],
         )