26
26
* computed using stochastic gradient descent method.
27
27
*/
28
28
29
+ #include < array>
29
30
#include < cassert>
30
31
#include < climits>
31
32
#include < cmath>
35
36
#include < numeric>
36
37
#include < vector>
37
38
38
- #define MAX_ITER 500 // INT_MAX ///< Maximum number of iterations to learn
39
+ /* * Maximum number of iterations to learn */
40
+ constexpr int MAX_ITER = 500 ; // INT_MAX
39
41
40
42
/* * \namespace machine_learning
41
43
* \brief Machine learning algorithms
@@ -50,8 +52,8 @@ class adaline {
50
52
* \param[in] convergence accuracy (optional,
51
53
* default=\f$1\times10^{-5}\f$)
52
54
*/
53
- adaline (int num_features, const double eta = 0 .01f ,
54
- const double accuracy = 1e-5 )
55
+ explicit adaline (int num_features, const double eta = 0 .01f ,
56
+ const double accuracy = 1e-5 )
55
57
: eta(eta), accuracy(accuracy) {
56
58
if (eta <= 0 ) {
57
59
std::cerr << " learning rate should be positive and nonzero"
@@ -64,7 +66,7 @@ class adaline {
64
66
1 ); // additional weight is for the constant bias term
65
67
66
68
// initialize with random weights in the range [-50, 49]
67
- for (int i = 0 ; i < weights. size (); i++) weights[i] = 1 .f ;
69
+ for (double &weight : weights) weight = 1 .f ;
68
70
// weights[i] = (static_cast<double>(std::rand() % 100) - 50);
69
71
}
70
72
@@ -75,8 +77,9 @@ class adaline {
75
77
out << " <" ;
76
78
for (int i = 0 ; i < ada.weights .size (); i++) {
77
79
out << ada.weights [i];
78
- if (i < ada.weights .size () - 1 )
80
+ if (i < ada.weights .size () - 1 ) {
79
81
out << " , " ;
82
+ }
80
83
}
81
84
out << " >" ;
82
85
return out;
@@ -90,28 +93,33 @@ class adaline {
90
93
* model prediction output
91
94
*/
92
95
int predict (const std::vector<double > &x, double *out = nullptr ) {
93
- if (!check_size_match (x))
96
+ if (!check_size_match (x)) {
94
97
return 0 ;
98
+ }
95
99
96
100
double y = weights.back (); // assign bias value
97
101
98
102
// for (int i = 0; i < x.size(); i++) y += x[i] * weights[i];
99
103
y = std::inner_product (x.begin (), x.end (), weights.begin (), y);
100
104
101
- if (out != nullptr ) // if out variable is provided
105
+ if (out != nullptr ) { // if out variable is provided
102
106
*out = y;
107
+ }
103
108
104
109
return activation (y); // quantizer: apply ADALINE threshold function
105
110
}
106
111
107
112
/* *
108
113
* Update the weights of the model using supervised learning for one
109
- * feature vector \param[in] x feature vector \param[in] y known output
110
- * value \returns correction factor
114
+ * feature vector
115
+ * \param[in] x feature vector
116
+ * \param[in] y known output value
117
+ * \returns correction factor
111
118
*/
112
119
double fit (const std::vector<double > &x, const int &y) {
113
- if (!check_size_match (x))
120
+ if (!check_size_match (x)) {
114
121
return 0 ;
122
+ }
115
123
116
124
/* output of the model with current weights */
117
125
int p = predict (x);
@@ -129,21 +137,23 @@ class adaline {
129
137
130
138
/* *
131
139
* Update the weights of the model using supervised learning for an
132
- * array of vectors. \param[in] X array of feature vector \param[in] y
133
- * known output value for each feature vector
140
+ * array of vectors.
141
+ * \param[in] X array of feature vector
142
+ * \param[in] y known output value for each feature vector
134
143
*/
135
- template <int N>
136
- void fit (std::vector<double > const (&X)[N], const int *y) {
144
+ template <size_t N>
145
+ void fit (std::array<std::vector<double >, N> const &X,
146
+ std::array<int , N> const &Y) {
137
147
double avg_pred_error = 1 .f ;
138
148
139
- int iter;
149
+ int iter = 0 ;
140
150
for (iter = 0 ; (iter < MAX_ITER) && (avg_pred_error > accuracy);
141
151
iter++) {
142
152
avg_pred_error = 0 .f ;
143
153
144
154
// perform fit for each sample
145
155
for (int i = 0 ; i < N; i++) {
146
- double err = fit (X[i], y [i]);
156
+ double err = fit (X[i], Y [i]);
147
157
avg_pred_error += std::abs (err);
148
158
}
149
159
avg_pred_error /= N;
@@ -154,15 +164,25 @@ class adaline {
154
164
<< " \t Avg error: " << avg_pred_error << std::endl;
155
165
}
156
166
157
- if (iter < MAX_ITER)
158
-
167
+ if (iter < MAX_ITER) {
159
168
std::cout << " Converged after " << iter << " iterations."
160
169
<< std::endl;
161
- else
170
+ } else {
162
171
std::cout << " Did not converge after " << iter << " iterations."
163
172
<< std::endl;
173
+ }
164
174
}
165
175
176
+ /* * Defines activation function as Heaviside's step function.
177
+ * \f[
178
+ * f(x) = \begin{cases}
179
+ * -1 & \forall x \le 0\\
180
+ * 1 & \forall x > 0
181
+ * \end{cases}
182
+ * \f]
183
+ * @param x input value to apply activation on
184
+ * @return activation output
185
+ */
166
186
int activation (double x) { return x > 0 ? 1 : -1 ; }
167
187
168
188
private:
@@ -206,15 +226,19 @@ void test1(double eta = 0.01) {
206
226
207
227
const int N = 10 ; // number of sample points
208
228
209
- std::vector<double > X[N] = {{0 , 1 }, {1 , -2 }, {2 , 3 }, {3 , -1 },
210
- {4 , 1 }, {6 , -5 }, {-7 , -3 }, {-8 , 5 },
211
- {-9 , 2 }, {-10 , -15 }};
212
- int y[] = {1 , -1 , 1 , -1 , -1 , -1 , 1 , 1 , 1 , -1 }; // corresponding y-values
229
+ std::array<std::vector<double >, N> X = {
230
+ std::vector<double >({0 , 1 }), std::vector<double >({1 , -2 }),
231
+ std::vector<double >({2 , 3 }), std::vector<double >({3 , -1 }),
232
+ std::vector<double >({4 , 1 }), std::vector<double >({6 , -5 }),
233
+ std::vector<double >({-7 , -3 }), std::vector<double >({-8 , 5 }),
234
+ std::vector<double >({-9 , 2 }), std::vector<double >({-10 , -15 })};
235
+ std::array<int , N> y = {1 , -1 , 1 , -1 , -1 ,
236
+ -1 , 1 , 1 , 1 , -1 }; // corresponding y-values
213
237
214
238
std::cout << " ------- Test 1 -------" << std::endl;
215
239
std::cout << " Model before fit: " << ada << std::endl;
216
240
217
- ada.fit (X, y);
241
+ ada.fit <N> (X, y);
218
242
std::cout << " Model after fit: " << ada << std::endl;
219
243
220
244
int predict = ada.predict ({5 , -3 });
@@ -240,17 +264,17 @@ void test2(double eta = 0.01) {
240
264
241
265
const int N = 50 ; // number of sample points
242
266
243
- std::vector<double > X[N] ;
244
- int Y[N] ; // corresponding y-values
267
+ std::array<std:: vector<double >, N> X ;
268
+ std::array< int , N> Y{} ; // corresponding y-values
245
269
246
270
// generate sample points in the interval
247
271
// [-range2/100 , (range2-1)/100]
248
272
int range = 500 ; // sample points full-range
249
273
int range2 = range >> 1 ; // sample points half-range
250
274
for (int i = 0 ; i < N; i++) {
251
- double x0 = ((std::rand () % range) - range2) / 100 .f ;
252
- double x1 = ((std::rand () % range) - range2) / 100 .f ;
253
- X[i] = {x0, x1};
275
+ double x0 = (static_cast < double > (std::rand () % range) - range2) / 100 .f ;
276
+ double x1 = (static_cast < double > (std::rand () % range) - range2) / 100 .f ;
277
+ X[i] = std::vector< double >( {x0, x1}) ;
254
278
Y[i] = (x0 + 3 . * x1) > -1 ? 1 : -1 ;
255
279
}
256
280
@@ -262,8 +286,8 @@ void test2(double eta = 0.01) {
262
286
263
287
int N_test_cases = 5 ;
264
288
for (int i = 0 ; i < N_test_cases; i++) {
265
- double x0 = ((std::rand () % range) - range2) / 100 .f ;
266
- double x1 = ((std::rand () % range) - range2) / 100 .f ;
289
+ double x0 = (static_cast < double > (std::rand () % range) - range2) / 100 .f ;
290
+ double x1 = (static_cast < double > (std::rand () % range) - range2) / 100 .f ;
267
291
268
292
int predict = ada.predict ({x0, x1});
269
293
@@ -291,18 +315,18 @@ void test3(double eta = 0.01) {
291
315
292
316
const int N = 100 ; // number of sample points
293
317
294
- std::vector<double > X[N] ;
295
- int Y[N] ; // corresponding y-values
318
+ std::array<std:: vector<double >, N> X ;
319
+ std::array< int , N> Y{} ; // corresponding y-values
296
320
297
321
// generate sample points in the interval
298
322
// [-range2/100 , (range2-1)/100]
299
323
int range = 200 ; // sample points full-range
300
324
int range2 = range >> 1 ; // sample points half-range
301
325
for (int i = 0 ; i < N; i++) {
302
- double x0 = ((std::rand () % range) - range2) / 100 .f ;
303
- double x1 = ((std::rand () % range) - range2) / 100 .f ;
304
- double x2 = ((std::rand () % range) - range2) / 100 .f ;
305
- X[i] = {x0, x1, x2, x0 * x0, x1 * x1, x2 * x2};
326
+ double x0 = (static_cast < double > (std::rand () % range) - range2) / 100 .f ;
327
+ double x1 = (static_cast < double > (std::rand () % range) - range2) / 100 .f ;
328
+ double x2 = (static_cast < double > (std::rand () % range) - range2) / 100 .f ;
329
+ X[i] = std::vector< double >( {x0, x1, x2, x0 * x0, x1 * x1, x2 * x2}) ;
306
330
Y[i] = ((x0 * x0) + (x1 * x1) + (x2 * x2)) <= 1 .f ? 1 : -1 ;
307
331
}
308
332
@@ -314,9 +338,9 @@ void test3(double eta = 0.01) {
314
338
315
339
int N_test_cases = 5 ;
316
340
for (int i = 0 ; i < N_test_cases; i++) {
317
- double x0 = ((std::rand () % range) - range2) / 100 .f ;
318
- double x1 = ((std::rand () % range) - range2) / 100 .f ;
319
- double x2 = ((std::rand () % range) - range2) / 100 .f ;
341
+ double x0 = (static_cast < double > (std::rand () % range) - range2) / 100 .f ;
342
+ double x1 = (static_cast < double > (std::rand () % range) - range2) / 100 .f ;
343
+ double x2 = (static_cast < double > (std::rand () % range) - range2) / 100 .f ;
320
344
321
345
int predict = ada.predict ({x0, x1, x2, x0 * x0, x1 * x1, x2 * x2});
322
346
@@ -334,8 +358,9 @@ int main(int argc, char **argv) {
334
358
std::srand (std::time (nullptr )); // initialize random number generator
335
359
336
360
double eta = 0.1 ; // default value of eta
337
- if (argc == 2 ) // read eta value from commandline argument if present
361
+ if (argc == 2 ) { // read eta value from commandline argument if present
338
362
eta = strtof (argv[1 ], nullptr );
363
+ }
339
364
340
365
test1 (eta);
341
366
0 commit comments