|
6 | 6 |
|
7 | 7 | using System;
|
8 | 8 | using System.Collections.Generic;
|
| 9 | +using System.IO; |
9 | 10 | using System.Linq;
|
10 | 11 | using Microsoft.ML;
|
11 | 12 | using Microsoft.ML.Data;
|
| 13 | +using TestNamespace.Model.DataModels; |
12 | 14 |
|
13 |
| -namespace TestNamespace.Train |
| 15 | +namespace TestNamespace.ConsoleApp |
14 | 16 | {
|
15 |
| - public static class ConsoleHelper |
| 17 | + public static class ModelBuilder |
16 | 18 | {
|
| 19 | + private static string TRAIN_DATA_FILEPATH = @"x:\dummypath\dummy_train.csv"; |
| 20 | + private static string TEST_DATA_FILEPATH = @"x:\dummypath\dummy_test.csv"; |
| 21 | + private static string MODEL_FILEPATH = @"../../../../TestNamespace.Model/MLModel.zip"; |
17 | 22 |
|
18 |
| - public static void PrintRegressionMetrics(RegressionMetrics metrics) |
| 23 | + // Create MLContext to be shared across the model creation workflow objects |
| 24 | + // Set a random seed for repeatable/deterministic results across multiple trainings. |
| 25 | + private static MLContext mlContext = new MLContext(seed: 1); |
| 26 | + |
| 27 | + public static void CreateModel() |
19 | 28 | {
|
20 |
| - Console.WriteLine($"*************************************************"); |
21 |
| - Console.WriteLine($"* Metrics for regression model "); |
22 |
| - Console.WriteLine($"*------------------------------------------------"); |
23 |
| - Console.WriteLine($"* LossFn: {metrics.LossFunction:0.##}"); |
24 |
| - Console.WriteLine($"* R2 Score: {metrics.RSquared:0.##}"); |
25 |
| - Console.WriteLine($"* Absolute loss: {metrics.MeanAbsoluteError:#.##}"); |
26 |
| - Console.WriteLine($"* Squared loss: {metrics.MeanSquaredError:#.##}"); |
27 |
| - Console.WriteLine($"* RMS loss: {metrics.RootMeanSquaredError:#.##}"); |
28 |
| - Console.WriteLine($"*************************************************"); |
| 29 | + // Load Data |
| 30 | + IDataView trainingDataView = mlContext.Data.LoadFromTextFile<SampleObservation>( |
| 31 | + path: TRAIN_DATA_FILEPATH, |
| 32 | + hasHeader: true, |
| 33 | + separatorChar: ',', |
| 34 | + allowQuoting: true, |
| 35 | + allowSparse: true); |
| 36 | + |
| 37 | + IDataView testDataView = mlContext.Data.LoadFromTextFile<SampleObservation>( |
| 38 | + path: TEST_DATA_FILEPATH, |
| 39 | + hasHeader: true, |
| 40 | + separatorChar: ',', |
| 41 | + allowQuoting: true, |
| 42 | + allowSparse: true); |
| 43 | + // Build training pipeline |
| 44 | + IEstimator<ITransformer> trainingPipeline = BuildTrainingPipeline(mlContext); |
| 45 | + |
| 46 | + // Train Model |
| 47 | + ITransformer mlModel = TrainModel(mlContext, trainingDataView, trainingPipeline); |
| 48 | + |
| 49 | + // Evaluate quality of Model |
| 50 | + EvaluateModel(mlContext, mlModel, testDataView); |
| 51 | + |
| 52 | + // Save model |
| 53 | + SaveModel(mlContext, mlModel, MODEL_FILEPATH, trainingDataView.Schema); |
29 | 54 | }
|
30 | 55 |
|
31 |
| - public static void PrintRegressionFoldsAverageMetrics(IEnumerable<TrainCatalogBase.CrossValidationResult<RegressionMetrics>> crossValidationResults) |
| 56 | + public static IEstimator<ITransformer> BuildTrainingPipeline(MLContext mlContext) |
32 | 57 | {
|
33 |
| - var L1 = crossValidationResults.Select(r => r.Metrics.MeanAbsoluteError); |
34 |
| - var L2 = crossValidationResults.Select(r => r.Metrics.MeanSquaredError); |
35 |
| - var RMS = crossValidationResults.Select(r => r.Metrics.MeanAbsoluteError); |
36 |
| - var lossFunction = crossValidationResults.Select(r => r.Metrics.LossFunction); |
37 |
| - var R2 = crossValidationResults.Select(r => r.Metrics.RSquared); |
| 58 | + // Data process configuration with pipeline data transformations |
| 59 | + var dataProcessPipeline = mlContext.Transforms.Concatenate("Out", new[] { "In" }) |
| 60 | + .AppendCacheCheckpoint(mlContext); |
38 | 61 |
|
39 |
| - Console.WriteLine($"*************************************************************************************************************"); |
40 |
| - Console.WriteLine($"* Metrics for Regression model "); |
41 |
| - Console.WriteLine($"*------------------------------------------------------------------------------------------------------------"); |
42 |
| - Console.WriteLine($"* Average L1 Loss: {L1.Average():0.###} "); |
43 |
| - Console.WriteLine($"* Average L2 Loss: {L2.Average():0.###} "); |
44 |
| - Console.WriteLine($"* Average RMS: {RMS.Average():0.###} "); |
45 |
| - Console.WriteLine($"* Average Loss Function: {lossFunction.Average():0.###} "); |
46 |
| - Console.WriteLine($"* Average R-squared: {R2.Average():0.###} "); |
47 |
| - Console.WriteLine($"*************************************************************************************************************"); |
| 62 | + // Set the training algorithm |
| 63 | + var trainer = mlContext.MulticlassClassification.Trainers.OneVersusAll(mlContext.BinaryClassification.Trainers.FastForest(labelColumnName: "Label", featureColumnName: "Features"), labelColumnName: "Label"); |
| 64 | + var trainingPipeline = dataProcessPipeline.Append(trainer); |
| 65 | + |
| 66 | + return trainingPipeline; |
48 | 67 | }
|
49 | 68 |
|
50 |
| - public static void PrintBinaryClassificationMetrics(BinaryClassificationMetrics metrics) |
| 69 | + public static ITransformer TrainModel(MLContext mlContext, IDataView trainingDataView, IEstimator<ITransformer> trainingPipeline) |
51 | 70 | {
|
52 |
| - Console.WriteLine($"************************************************************"); |
53 |
| - Console.WriteLine($"* Metrics for binary classification model "); |
54 |
| - Console.WriteLine($"*-----------------------------------------------------------"); |
55 |
| - Console.WriteLine($"* Accuracy: {metrics.Accuracy:P2}"); |
56 |
| - Console.WriteLine($"* Auc: {metrics.AreaUnderRocCurve:P2}"); |
57 |
| - Console.WriteLine($"************************************************************"); |
58 |
| - } |
| 71 | + Console.WriteLine("=============== Training model ==============="); |
59 | 72 |
|
| 73 | + ITransformer model = trainingPipeline.Fit(trainingDataView); |
60 | 74 |
|
61 |
| - public static void PrintBinaryClassificationFoldsAverageMetrics(IEnumerable<TrainCatalogBase.CrossValidationResult<BinaryClassificationMetrics>> crossValResults) |
| 75 | + Console.WriteLine("=============== End of training process ==============="); |
| 76 | + return model; |
| 77 | + } |
| 78 | + |
| 79 | + private static void EvaluateModel(MLContext mlContext, ITransformer mlModel, IDataView testDataView) |
62 | 80 | {
|
63 |
| - var metricsInMultipleFolds = crossValResults.Select(r => r.Metrics); |
| 81 | + // Evaluate the model and show accuracy stats |
| 82 | + Console.WriteLine("===== Evaluating Model's accuracy with Test data ====="); |
| 83 | + IDataView predictions = mlModel.Transform(testDataView); |
| 84 | + var metrics = mlContext.MulticlassClassification.Evaluate(predictions, "Label", "Score"); |
| 85 | + PrintMulticlassClassificationMetrics(metrics); |
| 86 | + } |
| 87 | + private static void SaveModel(MLContext mlContext, ITransformer mlModel, string modelRelativePath, DataViewSchema modelInputSchema) |
| 88 | + { |
| 89 | + // Save/persist the trained model to a .ZIP file |
| 90 | + Console.WriteLine($"=============== Saving the model ==============="); |
| 91 | + using (var fs = new FileStream(GetAbsolutePath(modelRelativePath), FileMode.Create, FileAccess.Write, FileShare.Write)) |
| 92 | + mlContext.Model.Save(mlModel, modelInputSchema, fs); |
64 | 93 |
|
65 |
| - var AccuracyValues = metricsInMultipleFolds.Select(m => m.Accuracy); |
66 |
| - var AccuracyAverage = AccuracyValues.Average(); |
67 |
| - var AccuraciesStdDeviation = CalculateStandardDeviation(AccuracyValues); |
68 |
| - var AccuraciesConfidenceInterval95 = CalculateConfidenceInterval95(AccuracyValues); |
| 94 | + Console.WriteLine("The model is saved to {0}", GetAbsolutePath(modelRelativePath)); |
| 95 | + } |
69 | 96 |
|
| 97 | + public static string GetAbsolutePath(string relativePath) |
| 98 | + { |
| 99 | + FileInfo _dataRoot = new FileInfo(typeof(Program).Assembly.Location); |
| 100 | + string assemblyFolderPath = _dataRoot.Directory.FullName; |
70 | 101 |
|
71 |
| - Console.WriteLine($"*************************************************************************************************************"); |
72 |
| - Console.WriteLine($"* Metrics for Binary Classification model "); |
73 |
| - Console.WriteLine($"*------------------------------------------------------------------------------------------------------------"); |
74 |
| - Console.WriteLine($"* Average Accuracy: {AccuracyAverage:0.###} - Standard deviation: ({AccuraciesStdDeviation:#.###}) - Confidence Interval 95%: ({AccuraciesConfidenceInterval95:#.###})"); |
75 |
| - Console.WriteLine($"*************************************************************************************************************"); |
| 102 | + string fullPath = Path.Combine(assemblyFolderPath, relativePath); |
76 | 103 |
|
| 104 | + return fullPath; |
77 | 105 | }
|
78 | 106 |
|
79 | 107 | public static void PrintMulticlassClassificationMetrics(MulticlassClassificationMetrics metrics)
|
|
0 commit comments