|
| 1 | +#!/usr/bin/python |
| 2 | + |
| 3 | +####################################################### |
| 4 | +# Copyright (c) 2019, ArrayFire |
| 5 | +# All rights reserved. |
| 6 | +# |
| 7 | +# This file is distributed under 3-clause BSD license. |
| 8 | +# The complete license agreement can be obtained at: |
| 9 | +# http://arrayfire.com/licenses/BSD-3-Clause |
| 10 | +######################################################## |
| 11 | + |
| 12 | +from mnist_common import display_results, setup_mnist |
| 13 | + |
| 14 | +import sys |
| 15 | +import time |
| 16 | + |
| 17 | +import arrayfire as af |
| 18 | +from arrayfire.algorithm import max, imax, count, sum |
| 19 | +from arrayfire.arith import abs, sigmoid, log |
| 20 | +from arrayfire.array import transpose |
| 21 | +from arrayfire.blas import matmul, matmulTN |
| 22 | +from arrayfire.data import constant, join, lookup, moddims |
| 23 | +from arrayfire.device import set_device, sync, eval |
| 24 | + |
| 25 | + |
| 26 | +def accuracy(predicted, target): |
| 27 | + _, tlabels = af.imax(target, 1) |
| 28 | + _, plabels = af.imax(predicted, 1) |
| 29 | + return 100 * af.count(plabels == tlabels) / tlabels.elements() |
| 30 | + |
| 31 | + |
| 32 | +def abserr(predicted, target): |
| 33 | + return 100 * af.sum(af.abs(predicted - target)) / predicted.elements() |
| 34 | + |
| 35 | + |
| 36 | +# Predict (probability) based on given parameters |
| 37 | +def predict_proba(X, Weights): |
| 38 | + Z = af.matmul(X, Weights) |
| 39 | + return af.sigmoid(Z) |
| 40 | + |
| 41 | + |
| 42 | +# Predict (log probability) based on given parameters |
| 43 | +def predict_log_proba(X, Weights): |
| 44 | + return af.log(predict_proba(X, Weights)) |
| 45 | + |
| 46 | + |
| 47 | +# Give most likely class based on given parameters |
| 48 | +def predict(X, Weights): |
| 49 | + probs = predict_proba(X, Weights) |
| 50 | + _, classes = af.imax(probs, 1) |
| 51 | + return classes |
| 52 | + |
| 53 | + |
| 54 | +def cost(Weights, X, Y, lambda_param=1.0): |
| 55 | + # Number of samples |
| 56 | + m = Y.dims()[0] |
| 57 | + |
| 58 | + dim0 = Weights.dims()[0] |
| 59 | + dim1 = Weights.dims()[1] if len(Weights.dims()) > 1 else None |
| 60 | + dim2 = Weights.dims()[2] if len(Weights.dims()) > 2 else None |
| 61 | + dim3 = Weights.dims()[3] if len(Weights.dims()) > 3 else None |
| 62 | + # Make the lambda corresponding to Weights(0) == 0 |
| 63 | + lambdat = af.constant(lambda_param, dim0, dim1, dim2, dim3) |
| 64 | + |
| 65 | + # No regularization for bias weights |
| 66 | + lambdat[0, :] = 0 |
| 67 | + |
| 68 | + # Get the prediction |
| 69 | + H = predict_proba(X, Weights) |
| 70 | + |
| 71 | + # Cost of misprediction |
| 72 | + Jerr = -1 * af.sum(Y * af.log(H) + (1 - Y) * af.log(1 - H), dim=0) |
| 73 | + |
| 74 | + # Regularization cost |
| 75 | + Jreg = 0.5 * af.sum(lambdat * Weights * Weights, dim=0) |
| 76 | + |
| 77 | + # Total cost |
| 78 | + J = (Jerr + Jreg) / m |
| 79 | + |
| 80 | + # Find the gradient of cost |
| 81 | + D = (H - Y) |
| 82 | + dJ = (af.matmulTN(X, D) + lambdat * Weights) / m |
| 83 | + |
| 84 | + return J, dJ |
| 85 | + |
| 86 | + |
| 87 | +def train(X, Y, alpha=0.1, lambda_param=1.0, maxerr=0.01, maxiter=1000, verbose=False): |
| 88 | + # Initialize parameters to 0 |
| 89 | + Weights = af.constant(0, X.dims()[1], Y.dims()[1]) |
| 90 | + |
| 91 | + for i in range(maxiter): |
| 92 | + # Get the cost and gradient |
| 93 | + J, dJ = cost(Weights, X, Y, lambda_param) |
| 94 | + |
| 95 | + err = af.max(af.abs(J)) |
| 96 | + if err < maxerr: |
| 97 | + print('Iteration {0:4d} Err: {1:4f}'.format(i + 1, err)) |
| 98 | + print('Training converged') |
| 99 | + return Weights |
| 100 | + |
| 101 | + if verbose and ((i+1) % 10 == 0): |
| 102 | + print('Iteration {0:4d} Err: {1:4f}'.format(i + 1, err)) |
| 103 | + |
| 104 | + # Update the parameters via gradient descent |
| 105 | + Weights = Weights - alpha * dJ |
| 106 | + |
| 107 | + if verbose: |
| 108 | + print('Training stopped after {0:d} iterations'.format(maxiter)) |
| 109 | + |
| 110 | + return Weights |
| 111 | + |
| 112 | + |
| 113 | +def benchmark_logistic_regression(train_feats, train_targets, test_feats): |
| 114 | + t0 = time.time() |
| 115 | + Weights = train(train_feats, train_targets, 0.1, 1.0, 0.01, 1000) |
| 116 | + af.eval(Weights) |
| 117 | + sync() |
| 118 | + t1 = time.time() |
| 119 | + dt = t1 - t0 |
| 120 | + print('Training time: {0:4.4f} s'.format(dt)) |
| 121 | + |
| 122 | + t0 = time.time() |
| 123 | + iters = 100 |
| 124 | + for i in range(iters): |
| 125 | + test_outputs = predict(test_feats, Weights) |
| 126 | + af.eval(test_outputs) |
| 127 | + sync() |
| 128 | + t1 = time.time() |
| 129 | + dt = t1 - t0 |
| 130 | + print('Prediction time: {0:4.4f} s'.format(dt / iters)) |
| 131 | + |
| 132 | + |
| 133 | +# Demo of one vs all logistic regression |
| 134 | +def logit_demo(console, perc): |
| 135 | + # Load mnist data |
| 136 | + frac = float(perc) / 100.0 |
| 137 | + mnist_data = setup_mnist(frac, True) |
| 138 | + num_classes = mnist_data[0] |
| 139 | + num_train = mnist_data[1] |
| 140 | + num_test = mnist_data[2] |
| 141 | + train_images = mnist_data[3] |
| 142 | + test_images = mnist_data[4] |
| 143 | + train_targets = mnist_data[5] |
| 144 | + test_targets = mnist_data[6] |
| 145 | + |
| 146 | + # Reshape images into feature vectors |
| 147 | + feature_length = int(train_images.elements() / num_train); |
| 148 | + train_feats = af.transpose(af.moddims(train_images, feature_length, num_train)) |
| 149 | + test_feats = af.transpose(af.moddims(test_images, feature_length, num_test)) |
| 150 | + |
| 151 | + train_targets = af.transpose(train_targets) |
| 152 | + test_targets = af.transpose(test_targets) |
| 153 | + |
| 154 | + num_train = train_feats.dims()[0] |
| 155 | + num_test = test_feats.dims()[0] |
| 156 | + |
| 157 | + # Add a bias that is always 1 |
| 158 | + train_bias = af.constant(1, num_train, 1) |
| 159 | + test_bias = af.constant(1, num_test, 1) |
| 160 | + train_feats = af.join(1, train_bias, train_feats) |
| 161 | + test_feats = af.join(1, test_bias, test_feats) |
| 162 | + |
| 163 | + # Train logistic regression parameters |
| 164 | + Weights = train(train_feats, train_targets, |
| 165 | + 0.1, # learning rate |
| 166 | + 1.0, # regularization constant |
| 167 | + 0.01, # max error |
| 168 | + 1000, # max iters |
| 169 | + True # verbose mode |
| 170 | + ) |
| 171 | + af.eval(Weights) |
| 172 | + af.sync() |
| 173 | + |
| 174 | + # Predict the results |
| 175 | + train_outputs = predict_proba(train_feats, Weights) |
| 176 | + test_outputs = predict_proba(test_feats, Weights) |
| 177 | + |
| 178 | + print('Accuracy on training data: {0:2.2f}'.format(accuracy(train_outputs, train_targets))) |
| 179 | + print('Accuracy on testing data: {0:2.2f}'.format(accuracy(test_outputs, test_targets))) |
| 180 | + print('Maximum error on testing data: {0:2.2f}'.format(abserr(test_outputs, test_targets))) |
| 181 | + |
| 182 | + benchmark_logistic_regression(train_feats, train_targets, test_feats) |
| 183 | + |
| 184 | + if not console: |
| 185 | + test_outputs = af.transpose(test_outputs) |
| 186 | + # Get 20 random test images |
| 187 | + display_results(test_images, test_outputs, af.transpose(test_targets), 20, True) |
| 188 | + |
| 189 | +def main(): |
| 190 | + argc = len(sys.argv) |
| 191 | + |
| 192 | + device = int(sys.argv[1]) if argc > 1 else 0 |
| 193 | + console = sys.argv[2][0] == '-' if argc > 2 else False |
| 194 | + perc = int(sys.argv[3]) if argc > 3 else 60 |
| 195 | + |
| 196 | + try: |
| 197 | + af.set_device(device) |
| 198 | + af.info() |
| 199 | + logit_demo(console, perc) |
| 200 | + except Exception as e: |
| 201 | + print('Error: ', str(e)) |
| 202 | + |
| 203 | + |
| 204 | +if __name__ == '__main__': |
| 205 | + main() |
0 commit comments