Skip to content

Commit 463d1f1

Browse files
committed
update who feature selection example
The plot has been updated with the revision of the ore paper * change kernel ridge to linear regression * add correlation plot that shows similarity between the selector methods
1 parent 2bd2d3d commit 463d1f1

File tree

1 file changed

+46
-9
lines changed

1 file changed

+46
-9
lines changed

examples/selection/FeatureSelection-WHODataset.py

Lines changed: 46 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,9 @@
99
#
1010

1111
import numpy as np
12+
import scipy
1213
from matplotlib import pyplot as plt
13-
from sklearn.kernel_ridge import KernelRidge
14+
from sklearn.linear_model import LinearRegression
1415
from sklearn.model_selection import train_test_split
1516

1617
from skmatter.datasets import load_who_dataset
@@ -57,8 +58,8 @@
5758
]
5859
)
5960

60-
columns = columns[[8, 4, 5, 6, 1, 0, 7, 3, 2]].tolist()
61-
column_names = column_names[[8, 4, 5, 6, 1, 0, 7, 3, 2]].tolist()
61+
columns = columns[[8, 4, 2, 6, 1, 7, 0, 5, 3]].tolist()
62+
column_names = column_names[[8, 4, 2, 6, 1, 7, 0, 5, 3]].tolist()
6263

6364
# %%
6465
#
@@ -102,9 +103,10 @@
102103

103104

104105
kernel_params = {"kernel": "rbf", "gamma": 0.08858667904100832}
105-
krr = KernelRidge(alpha=0.006158482110660267, **kernel_params)
106+
lr = LinearRegression(fit_intercept=False)
106107

107-
yp_train = krr.fit(X_train, y_train).predict(X_train)
108+
109+
yp_train = lr.fit(X_train, y_train).predict(X_train)
108110

109111
# %%
110112
#
@@ -171,8 +173,8 @@ def fit(self, X, y):
171173
for n in range(self.n_to_select):
172174
errors = np.zeros(len(remaining))
173175
for i, pp in enumerate(remaining):
174-
krr.fit(X[:, [*self.selected_idx_[:n], pp]], y)
175-
errors[i] = krr.score(X[:, [*self.selected_idx_[:n], pp]], y)
176+
lr.fit(X[:, [*self.selected_idx_[:n], pp]], y)
177+
errors[i] = lr.score(X[:, [*self.selected_idx_[:n], pp]], y)
176178
self.selected_idx_[n] = remaining[np.argmax(errors)]
177179
remaining = np.array(np.delete(remaining, np.argmax(errors)), dtype=int)
178180
return self
@@ -212,8 +214,8 @@ def fit(self, X, y):
212214
if label not in all_errors:
213215
errors = np.zeros(len(ns))
214216
for i, n in enumerate(ns):
215-
krr.fit(X_train[:, selector.selected_idx_[:n]], y_train)
216-
errors[i] = krr.score(X_test[:, selector.selected_idx_[:n]], y_test)
217+
lr.fit(X_train[:, selector.selected_idx_[:n]], y_train)
218+
errors[i] = lr.score(X_test[:, selector.selected_idx_[:n]], y_test)
217219
all_errors[label] = errors
218220
axes[0].plot(ns, all_errors[label], c=color, label=label, linestyle=linestyle)
219221
axes[1].plot(
@@ -230,3 +232,38 @@ def fit(self, X, y):
230232
axes[1].grid(axis="y", alpha=0.5)
231233
plt.tight_layout()
232234
plt.show()
235+
236+
237+
# %%
238+
#
239+
# Plot correlation between selectors
240+
# ----------------------------------
241+
242+
243+
selected_idx = np.array(
244+
[selector.selected_idx_ for selector in [cur, fps, pcur, pfps, rfa]]
245+
).T
246+
247+
weights = np.arange(9)
248+
similarity = np.zeros((len(selected_idx.T), len(selected_idx.T)))
249+
for i in range(len(selected_idx.T)):
250+
for j in range(len(selected_idx.T)):
251+
similarity[i, j] = scipy.stats.weightedtau(
252+
selected_idx[:, i], selected_idx[:, j], rank=weights
253+
)[0]
254+
255+
labels = ["CUR", "FPS", "PCovCUR", "PCovFPS,", "RFA"]
256+
257+
plt.imshow(similarity, cmap="Greens")
258+
plt.xticks(np.arange(len(labels)), labels=labels)
259+
plt.yticks(np.arange(len(labels)), labels=labels)
260+
261+
plt.title("Feature selection similarity")
262+
for i in range(len(labels)):
263+
for j in range(len(labels)):
264+
value = np.round(similarity[i, j], 2)
265+
color = "white" if value > 0.5 else "black"
266+
text = plt.gca().text(j, i, value, ha="center", va="center", color=color)
267+
268+
plt.colorbar()
269+
plt.show()

0 commit comments

Comments
 (0)