Skip to content

Commit dacd24e

Browse files
daholsteDmitry-A
authored andcommitted
[AutoML] Add AutoML example code (dotnet#3458)
1 parent d2f8fca commit dacd24e

16 files changed

+402
-6
lines changed

Microsoft.ML.AutoML.sln

+14
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "mlnet", "src\mlnet\mlnet.cs
1010
EndProject
1111
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "mlnet.Tests", "test\mlnet.Tests\mlnet.Tests.csproj", "{AAC3E4E6-C146-44BB-8873-A1E61D563F2A}"
1212
EndProject
13+
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.AutoML.Samples", "docs\samples\Microsoft.ML.AutoML.Samples\Microsoft.ML.AutoML.Samples.csproj", "{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}"
14+
EndProject
1315
Global
1416
GlobalSection(SolutionConfigurationPlatforms) = preSolution
1517
Debug|Any CPU = Debug|Any CPU
@@ -68,6 +70,18 @@ Global
6870
{AAC3E4E6-C146-44BB-8873-A1E61D563F2A}.Release-Intrinsics|Any CPU.Build.0 = Release-Intrinsics|Any CPU
6971
{AAC3E4E6-C146-44BB-8873-A1E61D563F2A}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU
7072
{AAC3E4E6-C146-44BB-8873-A1E61D563F2A}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU
73+
{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
74+
{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}.Debug|Any CPU.Build.0 = Debug|Any CPU
75+
{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug-Intrinsics|Any CPU
76+
{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}.Debug-Intrinsics|Any CPU.Build.0 = Debug-Intrinsics|Any CPU
77+
{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}.Debug-netfx|Any CPU.ActiveCfg = Debug-netfx|Any CPU
78+
{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}.Debug-netfx|Any CPU.Build.0 = Debug-netfx|Any CPU
79+
{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}.Release|Any CPU.ActiveCfg = Release|Any CPU
80+
{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}.Release|Any CPU.Build.0 = Release|Any CPU
81+
{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}.Release-Intrinsics|Any CPU.ActiveCfg = Release-Intrinsics|Any CPU
82+
{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}.Release-Intrinsics|Any CPU.Build.0 = Release-Intrinsics|Any CPU
83+
{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU
84+
{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU
7185
EndGlobalSection
7286
GlobalSection(SolutionProperties) = preSolution
7387
HideSolutionNode = FALSE
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
using System;
2+
using System.IO;
3+
using System.Linq;
4+
using Microsoft.ML.Auto;
5+
using Microsoft.ML.Data;
6+
7+
namespace Microsoft.ML.AutoML.Samples
8+
{
9+
public static class BinaryClassificationExperiment
10+
{
11+
private static string TrainDataPath = "<Path to your train dataset goes here>";
12+
private static string TestDataPath = "<Path to your test dataset goes here>";
13+
private static string ModelPath = @"<Desired model output directory goes here>\SentimentModel.zip";
14+
private static uint ExperimentTime = 60;
15+
16+
public static void Run()
17+
{
18+
MLContext mlContext = new MLContext();
19+
20+
// STEP 1: Load data
21+
IDataView trainDataView = mlContext.Data.LoadFromTextFile<SentimentIssue>(TrainDataPath, hasHeader: true);
22+
IDataView testDataView = mlContext.Data.LoadFromTextFile<SentimentIssue>(TestDataPath, hasHeader: true);
23+
24+
// STEP 2: Run AutoML experiment
25+
Console.WriteLine($"Running AutoML binary classification experiment for {ExperimentTime} seconds...");
26+
ExperimentResult<BinaryClassificationMetrics> experimentResult = mlContext.Auto()
27+
.CreateBinaryClassificationExperiment(ExperimentTime)
28+
.Execute(trainDataView);
29+
30+
// STEP 3: Print metric from the best model
31+
RunDetail<BinaryClassificationMetrics> bestRun = experimentResult.BestRun;
32+
Console.WriteLine($"Total models produced: {experimentResult.RunDetails.Count()}");
33+
Console.WriteLine($"Best model's trainer: {bestRun.TrainerName}");
34+
Console.WriteLine($"Metrics of best model from validation data --");
35+
PrintMetrics(bestRun.ValidationMetrics);
36+
37+
// STEP 4: Evaluate test data
38+
IDataView testDataViewWithBestScore = bestRun.Model.Transform(testDataView);
39+
BinaryClassificationMetrics testMetrics = mlContext.BinaryClassification.EvaluateNonCalibrated(testDataViewWithBestScore);
40+
Console.WriteLine($"Metrics of best model on test data --");
41+
PrintMetrics(testMetrics);
42+
43+
// STEP 5: Save the best model for later deployment and inferencing
44+
using (FileStream fs = File.Create(ModelPath))
45+
mlContext.Model.Save(bestRun.Model, trainDataView.Schema, fs);
46+
47+
// STEP 6: Create prediction engine from the best trained model
48+
var predictionEngine = mlContext.Model.CreatePredictionEngine<SentimentIssue, SentimentPrediction>(bestRun.Model);
49+
50+
// STEP 7: Initialize a new sentiment issue, and get the predicted sentiment
51+
var testSentimentIssue = new SentimentIssue
52+
{
53+
Text = "I hope this helps."
54+
};
55+
var prediction = predictionEngine.Predict(testSentimentIssue);
56+
Console.WriteLine($"Predicted sentiment for test issue: {prediction.Prediction}");
57+
58+
Console.WriteLine("Press any key to continue...");
59+
Console.ReadKey();
60+
}
61+
62+
private static void PrintMetrics(BinaryClassificationMetrics metrics)
63+
{
64+
Console.WriteLine($"Accuracy: {metrics.Accuracy}");
65+
Console.WriteLine($"AreaUnderPrecisionRecallCurve: {metrics.AreaUnderPrecisionRecallCurve}");
66+
Console.WriteLine($"AreaUnderRocCurve: {metrics.AreaUnderRocCurve}");
67+
Console.WriteLine($"F1Score: {metrics.F1Score}");
68+
Console.WriteLine($"NegativePrecision: {metrics.NegativePrecision}");
69+
Console.WriteLine($"NegativeRecall: {metrics.NegativeRecall}");
70+
Console.WriteLine($"PositivePrecision: {metrics.PositivePrecision}");
71+
Console.WriteLine($"PositiveRecall: {metrics.PositiveRecall}");
72+
}
73+
}
74+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
using Microsoft.ML.Data;
2+
3+
namespace Microsoft.ML.AutoML.Samples
4+
{
5+
public class PixelData
6+
{
7+
[LoadColumn(0, 63)]
8+
[VectorType(64)]
9+
public float[] PixelValues;
10+
11+
[LoadColumn(64)]
12+
public float Number;
13+
}
14+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
using Microsoft.ML.Data;
2+
3+
namespace Microsoft.ML.AutoML.Samples
4+
{
5+
public class PixelPrediction
6+
{
7+
[ColumnName("PredictedLabel")]
8+
public float Prediction;
9+
}
10+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
using Microsoft.ML.Data;
2+
3+
namespace Microsoft.ML.AutoML.Samples
4+
{
5+
public class SentimentIssue
6+
{
7+
[LoadColumn(0)]
8+
public bool Label { get; set; }
9+
10+
[LoadColumn(1)]
11+
public string Text { get; set; }
12+
}
13+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
using Microsoft.ML.Data;
2+
3+
namespace Microsoft.ML.AutoML.Samples
4+
{
5+
public class SentimentPrediction
6+
{
7+
// ColumnName attribute is used to change the column name from
8+
// its default value, which is the name of the field.
9+
[ColumnName("PredictedLabel")]
10+
public bool Prediction { get; set; }
11+
12+
public float Score { get; set; }
13+
}
14+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
using Microsoft.ML.Data;
2+
3+
namespace Microsoft.ML.AutoML.Samples
4+
{
5+
public class TaxiTrip
6+
{
7+
[LoadColumn(0)]
8+
public string VendorId;
9+
10+
[LoadColumn(1)]
11+
public float RateCode;
12+
13+
[LoadColumn(2)]
14+
public float PassengerCount;
15+
16+
[LoadColumn(3)]
17+
public float TripTimeInSeconds;
18+
19+
[LoadColumn(4)]
20+
public float TripDistance;
21+
22+
[LoadColumn(5)]
23+
public string PaymentType;
24+
25+
[LoadColumn(6)]
26+
public float FareAmount;
27+
}
28+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
using Microsoft.ML.Data;
2+
3+
namespace Microsoft.ML.AutoML.Samples
4+
{
5+
public class TaxiTripFarePrediction
6+
{
7+
[ColumnName("Score")]
8+
public float FareAmount;
9+
}
10+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<OutputType>Exe</OutputType>
5+
<TargetFramework>netcoreapp2.1</TargetFramework>
6+
</PropertyGroup>
7+
8+
<ItemGroup>
9+
<ProjectReference Include="..\..\..\src\Microsoft.ML.Auto\Microsoft.ML.Auto.csproj" />
10+
</ItemGroup>
11+
12+
</Project>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
using System;
2+
using System.IO;
3+
using System.Linq;
4+
using Microsoft.ML.Auto;
5+
using Microsoft.ML.Data;
6+
7+
namespace Microsoft.ML.AutoML.Samples
8+
{
9+
public static class MulticlassClassificationExperiment
10+
{
11+
private static string TrainDataPath = "<Path to your train dataset goes here>";
12+
private static string TestDataPath = "<Path to your test dataset goes here>";
13+
private static string ModelPath = @"<Desired model output directory goes here>\OptDigitsModel.zip";
14+
private static string LabelColumnName = "Number";
15+
private static uint ExperimentTime = 60;
16+
17+
public static void Run()
18+
{
19+
MLContext mlContext = new MLContext();
20+
21+
// STEP 1: Load data
22+
IDataView trainDataView = mlContext.Data.LoadFromTextFile<PixelData>(TrainDataPath, separatorChar: ',');
23+
IDataView testDataView = mlContext.Data.LoadFromTextFile<PixelData>(TestDataPath, separatorChar: ',');
24+
25+
// STEP 2: Run AutoML experiment
26+
Console.WriteLine($"Running AutoML multiclass classification experiment for {ExperimentTime} seconds...");
27+
ExperimentResult<MulticlassClassificationMetrics> experimentResult = mlContext.Auto()
28+
.CreateMulticlassClassificationExperiment(ExperimentTime)
29+
.Execute(trainDataView, LabelColumnName);
30+
31+
// STEP 3: Print metric from the best model
32+
RunDetail<MulticlassClassificationMetrics> bestRun = experimentResult.BestRun;
33+
Console.WriteLine($"Total models produced: {experimentResult.RunDetails.Count()}");
34+
Console.WriteLine($"Best model's trainer: {bestRun.TrainerName}");
35+
Console.WriteLine($"Metrics of best model from validation data --");
36+
PrintMetrics(bestRun.ValidationMetrics);
37+
38+
// STEP 4: Evaluate test data
39+
IDataView testDataViewWithBestScore = bestRun.Model.Transform(testDataView);
40+
MulticlassClassificationMetrics testMetrics = mlContext.MulticlassClassification.Evaluate(testDataViewWithBestScore, labelColumnName: LabelColumnName);
41+
Console.WriteLine($"Metrics of best model on test data --");
42+
PrintMetrics(testMetrics);
43+
44+
// STEP 5: Save the best model for later deployment and inferencing
45+
using (FileStream fs = File.Create(ModelPath))
46+
mlContext.Model.Save(bestRun.Model, trainDataView.Schema, fs);
47+
48+
// STEP 6: Create prediction engine from the best trained model
49+
var predictionEngine = mlContext.Model.CreatePredictionEngine<PixelData, PixelPrediction>(bestRun.Model);
50+
51+
// STEP 7: Initialize new pixel data, and get the predicted number
52+
var testPixelData = new PixelData
53+
{
54+
PixelValues = new float[] { 0, 0, 1, 8, 15, 10, 0, 0, 0, 3, 13, 15, 14, 14, 0, 0, 0, 5, 10, 0, 10, 12, 0, 0, 0, 0, 3, 5, 15, 10, 2, 0, 0, 0, 16, 16, 16, 16, 12, 0, 0, 1, 8, 12, 14, 8, 3, 0, 0, 0, 0, 10, 13, 0, 0, 0, 0, 0, 0, 11, 9, 0, 0, 0 }
55+
};
56+
var prediction = predictionEngine.Predict(testPixelData);
57+
Console.WriteLine($"Predicted number for test pixels: {prediction.Prediction}");
58+
59+
Console.WriteLine("Press any key to continue...");
60+
Console.ReadKey();
61+
}
62+
63+
private static void PrintMetrics(MulticlassClassificationMetrics metrics)
64+
{
65+
Console.WriteLine($"LogLoss: {metrics.LogLoss}");
66+
Console.WriteLine($"LogLossReduction: {metrics.LogLossReduction}");
67+
Console.WriteLine($"MacroAccuracy: {metrics.MacroAccuracy}");
68+
Console.WriteLine($"MicroAccuracy: {metrics.MicroAccuracy}");
69+
}
70+
}
71+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
using System;
2+
3+
namespace Microsoft.ML.AutoML.Samples
4+
{
5+
public class Program
6+
{
7+
public static void Main(string[] args)
8+
{
9+
try
10+
{
11+
RegressionExperiment.Run();
12+
Console.Clear();
13+
14+
BinaryClassificationExperiment.Run();
15+
Console.Clear();
16+
17+
MulticlassClassificationExperiment.Run();
18+
Console.Clear();
19+
20+
Console.WriteLine("Done");
21+
}
22+
catch (Exception ex)
23+
{
24+
Console.WriteLine($"Exception {ex}");
25+
}
26+
27+
Console.ReadLine();
28+
}
29+
}
30+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
using System;
2+
using System.IO;
3+
using System.Linq;
4+
using Microsoft.ML.Auto;
5+
using Microsoft.ML.Data;
6+
7+
namespace Microsoft.ML.AutoML.Samples
8+
{
9+
public static class RegressionExperiment
10+
{
11+
private static string TrainDataPath = "<Path to your train dataset goes here>";
12+
private static string TestDataPath = "<Path to your test dataset goes here>";
13+
private static string ModelPath = @"<Desired model output directory goes here>\TaxiFareModel.zip";
14+
private static string LabelColumnName = "FareAmount";
15+
private static uint ExperimentTime = 60;
16+
17+
public static void Run()
18+
{
19+
MLContext mlContext = new MLContext();
20+
21+
// STEP 1: Load data
22+
IDataView trainDataView = mlContext.Data.LoadFromTextFile<TaxiTrip>(TrainDataPath, hasHeader: true, separatorChar: ',');
23+
IDataView testDataView = mlContext.Data.LoadFromTextFile<TaxiTrip>(TestDataPath, hasHeader: true, separatorChar: ',');
24+
25+
// STEP 2: Run AutoML experiment
26+
Console.WriteLine($"Running AutoML regression experiment for {ExperimentTime} seconds...");
27+
ExperimentResult<RegressionMetrics> experimentResult = mlContext.Auto()
28+
.CreateRegressionExperiment(ExperimentTime)
29+
.Execute(trainDataView, LabelColumnName);
30+
31+
// STEP 3: Print metric from best model
32+
RunDetail<RegressionMetrics> bestRun = experimentResult.BestRun;
33+
Console.WriteLine($"Total models produced: {experimentResult.RunDetails.Count()}");
34+
Console.WriteLine($"Best model's trainer: {bestRun.TrainerName}");
35+
Console.WriteLine($"Metrics of best model from validation data --");
36+
PrintMetrics(bestRun.ValidationMetrics);
37+
38+
// STEP 5: Evaluate test data
39+
IDataView testDataViewWithBestScore = bestRun.Model.Transform(testDataView);
40+
RegressionMetrics testMetrics = mlContext.Regression.Evaluate(testDataViewWithBestScore, labelColumnName: LabelColumnName);
41+
Console.WriteLine($"Metrics of best model on test data --");
42+
PrintMetrics(testMetrics);
43+
44+
// STEP 6: Save the best model for later deployment and inferencing
45+
using (FileStream fs = File.Create(ModelPath))
46+
mlContext.Model.Save(bestRun.Model, trainDataView.Schema, fs);
47+
48+
// STEP 7: Create prediction engine from the best trained model
49+
var predictionEngine = mlContext.Model.CreatePredictionEngine<TaxiTrip, TaxiTripFarePrediction>(bestRun.Model);
50+
51+
// STEP 8: Initialize a new test taxi trip, and get the predicted fare
52+
var testTaxiTrip = new TaxiTrip
53+
{
54+
VendorId = "VTS",
55+
RateCode = 1,
56+
PassengerCount = 1,
57+
TripTimeInSeconds = 1140,
58+
TripDistance = 3.75f,
59+
PaymentType = "CRD"
60+
};
61+
var prediction = predictionEngine.Predict(testTaxiTrip);
62+
Console.WriteLine($"Predicted fare for test taxi trip: {prediction.FareAmount}");
63+
64+
Console.WriteLine("Press any key to continue...");
65+
Console.ReadKey();
66+
}
67+
68+
private static void PrintMetrics(RegressionMetrics metrics)
69+
{
70+
Console.WriteLine($"MeanAbsoluteError: {metrics.MeanAbsoluteError}");
71+
Console.WriteLine($"MeanSquaredError: {metrics.MeanSquaredError}");
72+
Console.WriteLine($"RootMeanSquaredError: {metrics.RootMeanSquaredError}");
73+
Console.WriteLine($"RSquared: {metrics.RSquared}");
74+
}
75+
}
76+
}

0 commit comments

Comments
 (0)