9
9
#
10
10
11
11
import numpy as np
12
+ import scipy
12
13
from matplotlib import pyplot as plt
13
- from sklearn .kernel_ridge import KernelRidge
14
+ from sklearn .linear_model import LinearRegression
14
15
from sklearn .model_selection import train_test_split
15
16
16
17
from skmatter .datasets import load_who_dataset
57
58
]
58
59
)
59
60
60
- columns = columns [[8 , 4 , 5 , 6 , 1 , 0 , 7 , 3 , 2 ]].tolist ()
61
- column_names = column_names [[8 , 4 , 5 , 6 , 1 , 0 , 7 , 3 , 2 ]].tolist ()
61
+ columns = columns [[8 , 4 , 2 , 6 , 1 , 7 , 0 , 5 , 3 ]].tolist ()
62
+ column_names = column_names [[8 , 4 , 2 , 6 , 1 , 7 , 0 , 5 , 3 ]].tolist ()
62
63
63
64
# %%
64
65
#
102
103
103
104
104
105
kernel_params = {"kernel" : "rbf" , "gamma" : 0.08858667904100832 }
105
- krr = KernelRidge ( alpha = 0.006158482110660267 , ** kernel_params )
106
+ lr = LinearRegression ( fit_intercept = False )
106
107
107
- yp_train = krr .fit (X_train , y_train ).predict (X_train )
108
+
109
+ yp_train = lr .fit (X_train , y_train ).predict (X_train )
108
110
109
111
# %%
110
112
#
@@ -171,8 +173,8 @@ def fit(self, X, y):
171
173
for n in range (self .n_to_select ):
172
174
errors = np .zeros (len (remaining ))
173
175
for i , pp in enumerate (remaining ):
174
- krr .fit (X [:, [* self .selected_idx_ [:n ], pp ]], y )
175
- errors [i ] = krr .score (X [:, [* self .selected_idx_ [:n ], pp ]], y )
176
+ lr .fit (X [:, [* self .selected_idx_ [:n ], pp ]], y )
177
+ errors [i ] = lr .score (X [:, [* self .selected_idx_ [:n ], pp ]], y )
176
178
self .selected_idx_ [n ] = remaining [np .argmax (errors )]
177
179
remaining = np .array (np .delete (remaining , np .argmax (errors )), dtype = int )
178
180
return self
@@ -212,8 +214,8 @@ def fit(self, X, y):
212
214
if label not in all_errors :
213
215
errors = np .zeros (len (ns ))
214
216
for i , n in enumerate (ns ):
215
- krr .fit (X_train [:, selector .selected_idx_ [:n ]], y_train )
216
- errors [i ] = krr .score (X_test [:, selector .selected_idx_ [:n ]], y_test )
217
+ lr .fit (X_train [:, selector .selected_idx_ [:n ]], y_train )
218
+ errors [i ] = lr .score (X_test [:, selector .selected_idx_ [:n ]], y_test )
217
219
all_errors [label ] = errors
218
220
axes [0 ].plot (ns , all_errors [label ], c = color , label = label , linestyle = linestyle )
219
221
axes [1 ].plot (
@@ -230,3 +232,38 @@ def fit(self, X, y):
230
232
axes [1 ].grid (axis = "y" , alpha = 0.5 )
231
233
plt .tight_layout ()
232
234
plt .show ()
235
+
236
+
237
+ # %%
238
+ #
239
+ # Plot correlation between selectors
240
+ # ----------------------------------
241
+
242
+
243
+ selected_idx = np .array (
244
+ [selector .selected_idx_ for selector in [cur , fps , pcur , pfps , rfa ]]
245
+ ).T
246
+
247
+ weights = np .arange (9 )
248
+ similarity = np .zeros ((len (selected_idx .T ), len (selected_idx .T )))
249
+ for i in range (len (selected_idx .T )):
250
+ for j in range (len (selected_idx .T )):
251
+ similarity [i , j ] = scipy .stats .weightedtau (
252
+ selected_idx [:, i ], selected_idx [:, j ], rank = weights
253
+ )[0 ]
254
+
255
+ labels = ["CUR" , "FPS" , "PCovCUR" , "PCovFPS," , "RFA" ]
256
+
257
+ plt .imshow (similarity , cmap = "Greens" )
258
+ plt .xticks (np .arange (len (labels )), labels = labels )
259
+ plt .yticks (np .arange (len (labels )), labels = labels )
260
+
261
+ plt .title ("Feature selection similarity" )
262
+ for i in range (len (labels )):
263
+ for j in range (len (labels )):
264
+ value = np .round (similarity [i , j ], 2 )
265
+ color = "white" if value > 0.5 else "black"
266
+ text = plt .gca ().text (j , i , value , ha = "center" , va = "center" , color = color )
267
+
268
+ plt .colorbar ()
269
+ plt .show ()
0 commit comments