dotnet · daholste · Apr 25, 2019 · Apr 20, 2019 · Apr 22, 2019 · Apr 22, 2019
diff --git a/Microsoft.ML.AutoML.sln b/Microsoft.ML.AutoML.sln
@@ -10,6 +10,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "mlnet", "src\mlnet\mlnet.cs
 EndProject
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "mlnet.Tests", "test\mlnet.Tests\mlnet.Tests.csproj", "{AAC3E4E6-C146-44BB-8873-A1E61D563F2A}"
 EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Microsoft.ML.AutoML.Samples", "docs\samples\Microsoft.ML.AutoML.Samples\Microsoft.ML.AutoML.Samples.csproj", "{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
@@ -68,6 +70,18 @@ Global
 		{AAC3E4E6-C146-44BB-8873-A1E61D563F2A}.Release-Intrinsics|Any CPU.Build.0 = Release-Intrinsics|Any CPU
 		{AAC3E4E6-C146-44BB-8873-A1E61D563F2A}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU
 		{AAC3E4E6-C146-44BB-8873-A1E61D563F2A}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU
+		{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug-Intrinsics|Any CPU
+		{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}.Debug-Intrinsics|Any CPU.Build.0 = Debug-Intrinsics|Any CPU
+		{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}.Debug-netfx|Any CPU.ActiveCfg = Debug-netfx|Any CPU
+		{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}.Debug-netfx|Any CPU.Build.0 = Debug-netfx|Any CPU
+		{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}.Release|Any CPU.Build.0 = Release|Any CPU
+		{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}.Release-Intrinsics|Any CPU.ActiveCfg = Release-Intrinsics|Any CPU
+		{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}.Release-Intrinsics|Any CPU.Build.0 = Release-Intrinsics|Any CPU
+		{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU
+		{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE

diff --git a/docs/samples/Microsoft.ML.AutoML.Samples/BinaryClassificationExperiment.cs b/docs/samples/Microsoft.ML.AutoML.Samples/BinaryClassificationExperiment.cs
@@ -0,0 +1,61 @@
+using System;
+using System.IO;
+using System.Linq;
+using Microsoft.ML.Auto;
+using Microsoft.ML.Data;
+
+namespace Microsoft.ML.AutoML.Samples
+{
+    public static class BinaryClassificationExperiment
+    {
+        private static string BaseDatasetsLocation = "Data";
+        private static string TrainDataPath = Path.Combine(BaseDatasetsLocation, "wikipedia-detox-250-line-data.tsv");
+        private static string TestDataPath = Path.Combine(BaseDatasetsLocation, "wikipedia-detox-250-line-test.tsv");
+        private static string ModelPath = Path.Combine(BaseDatasetsLocation, "SentimentModel.zip");
+        private static uint ExperimentTime = 60;
+
+        public static void Run()
+        {
+            MLContext mlContext = new MLContext();
+
+            // STEP 1: Load data
+            IDataView trainDataView = mlContext.Data.LoadFromTextFile<SentimentIssue>(TrainDataPath, hasHeader: true);
+            IDataView testDataView = mlContext.Data.LoadFromTextFile<SentimentIssue>(TestDataPath, hasHeader: true);
+
+            // STEP 2: Run AutoML experiment
+            Console.WriteLine($"Running AutoML binary classification experiment for {ExperimentTime} seconds...");
+            ExperimentResult<BinaryClassificationMetrics> experimentResult = mlContext.Auto()
+                .CreateBinaryClassificationExperiment(ExperimentTime)
+                .Execute(trainDataView);
+
+            // STEP 3: Print metric from the best model
+            RunDetail<BinaryClassificationMetrics> bestRun = experimentResult.BestRun;
+            Console.WriteLine($"Total models produced: {experimentResult.RunDetails.Count()}");
+            Console.WriteLine($"Best model's trainer: {bestRun.TrainerName}");
+            Console.WriteLine($"Accuracy of best model from validation data: {bestRun.ValidationMetrics.Accuracy}");
+
+            // STEP 4: Evaluate test data
+            IDataView testDataViewWithBestScore = bestRun.Model.Transform(testDataView);
+            BinaryClassificationMetrics testMetrics = mlContext.BinaryClassification.EvaluateNonCalibrated(testDataViewWithBestScore);
+            Console.WriteLine($"Accuracy of best model on test data: {testMetrics.Accuracy}");
+
+            // STEP 5: Save the best model for later deployment and inferencing
+            using (FileStream fs = File.Create(ModelPath))
+                mlContext.Model.Save(bestRun.Model, trainDataView.Schema, fs);
+
+            // STEP 6: Create prediction engine from the best trained model
+            var predictionEngine = mlContext.Model.CreatePredictionEngine<SentimentIssue, SentimentPrediction>(bestRun.Model);
+
+            // STEP 7: Initialize a new sentiment issue, and get the predicted sentiment
+            var testSentimentIssue = new SentimentIssue
+            {
+                Text = "I hope this helps."
+            };
+            var prediction = predictionEngine.Predict(testSentimentIssue);
+            Console.WriteLine($"Predicted sentiment for test issue: {prediction.Prediction}");
+
+            Console.WriteLine("Press any key to continue...");
+            Console.ReadKey();
+        }
+    }
+}
diff --git a/docs/samples/Microsoft.ML.AutoML.Samples/DataStructures/PixelData.cs b/docs/samples/Microsoft.ML.AutoML.Samples/DataStructures/PixelData.cs
@@ -0,0 +1,14 @@
+using Microsoft.ML.Data;
+
+namespace Microsoft.ML.AutoML.Samples
+{
+    public class PixelData
+    {
+        [LoadColumn(0, 63)]
+        [VectorType(64)]
+        public float[] PixelValues;
+
+        [LoadColumn(64)]
+        public float Number;
+    }
+}
diff --git a/docs/samples/Microsoft.ML.AutoML.Samples/DataStructures/PixelPrediction.cs b/docs/samples/Microsoft.ML.AutoML.Samples/DataStructures/PixelPrediction.cs
@@ -0,0 +1,10 @@
+using Microsoft.ML.Data;
+
+namespace Microsoft.ML.AutoML.Samples
+{
+    public class PixelPrediction
+    {
+        [ColumnName("PredictedLabel")]
+        public float Prediction;
+    }
+}
diff --git a/docs/samples/Microsoft.ML.AutoML.Samples/DataStructures/SentimentIssue.cs b/docs/samples/Microsoft.ML.AutoML.Samples/DataStructures/SentimentIssue.cs
@@ -0,0 +1,13 @@
+using Microsoft.ML.Data;
+
+namespace Microsoft.ML.AutoML.Samples
+{
+    public class SentimentIssue
+    {
+        [LoadColumn(0)]
+        public bool Label { get; set; }
+
+        [LoadColumn(1)]
+        public string Text { get; set; }
+    }
+}
diff --git a/docs/samples/Microsoft.ML.AutoML.Samples/DataStructures/SentimentPrediction.cs b/docs/samples/Microsoft.ML.AutoML.Samples/DataStructures/SentimentPrediction.cs
@@ -0,0 +1,18 @@
+using Microsoft.ML.Data;
+
+namespace Microsoft.ML.AutoML.Samples
+{
+    public class SentimentPrediction
+    {
+        // ColumnName attribute is used to change the column name from
+        // its default value, which is the name of the field.
+        [ColumnName("PredictedLabel")]
+        public bool Prediction { get; set; }
+
+        // No need to specify ColumnName attribute, because the field
+        // name "Probability" is the column name we want.
+        public float Probability { get; set; }
+
+        public float Score { get; set; }
+    }
+}
diff --git a/docs/samples/Microsoft.ML.AutoML.Samples/DataStructures/TaxiTrip.cs b/docs/samples/Microsoft.ML.AutoML.Samples/DataStructures/TaxiTrip.cs
@@ -0,0 +1,28 @@
+using Microsoft.ML.Data;
+
+namespace Microsoft.ML.AutoML.Samples
+{
+    public class TaxiTrip
+    {
+        [LoadColumn(0)]
+        public string VendorId;
+
+        [LoadColumn(1)]
+        public float RateCode;
+
+        [LoadColumn(2)]
+        public float PassengerCount;
+
+        [LoadColumn(3)]
+        public float TripTimeInSeconds;
+
+        [LoadColumn(4)]
+        public float TripDistance;
+
+        [LoadColumn(5)]
+        public string PaymentType;
+
+        [LoadColumn(6)]
+        public float FareAmount;
+    }
+}
diff --git a/docs/samples/Microsoft.ML.AutoML.Samples/DataStructures/TaxiTripFarePrediction.cs b/docs/samples/Microsoft.ML.AutoML.Samples/DataStructures/TaxiTripFarePrediction.cs
@@ -0,0 +1,10 @@
+using Microsoft.ML.Data;
+
+namespace Microsoft.ML.AutoML.Samples
+{
+    public class TaxiTripFarePrediction
+    {
+        [ColumnName("Score")]
+        public float FareAmount;
+    }
+}
diff --git a/docs/samples/Microsoft.ML.AutoML.Samples/Microsoft.ML.AutoML.Samples.csproj b/docs/samples/Microsoft.ML.AutoML.Samples/Microsoft.ML.AutoML.Samples.csproj
@@ -0,0 +1,46 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <TargetFramework>netcoreapp2.2</TargetFramework>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\..\..\src\Microsoft.ML.Auto\Microsoft.ML.Auto.csproj" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <Folder Include="Data\" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <None Include="..\..\..\test\data\optdigits-test.csv" Link="Data\optdigits-test.csv">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Include="..\..\..\test\data\optdigits-train.csv" Link="Data\optdigits-train.csv">
 public static string DownloadHousingRegressionDataset() 
 { 
     var fileName = "housing.txt"; 
     if (!File.Exists(fileName)) 
         Download("https://raw.githubusercontent.com/dotnet/machinelearning/024bd4452e1d3660214c757237a19d6123f951ca/test/data/housing.txt", fileName); 
     return fileName; 
 } 
 public static string DownloadHousingRegressionDataset() 
 { 
     var fileName = "housing.txt"; 
     if (!File.Exists(fileName)) 
         Download("https://raw.githubusercontent.com/dotnet/machinelearning/024bd4452e1d3660214c757237a19d6123f951ca/test/data/housing.txt", fileName); 
     return fileName; 
 } 
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Include="..\..\..\test\data\taxi-fare-test.csv" Link="Data\taxi-fare-test.csv">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Include="..\..\..\test\data\taxi-fare-train.csv" Link="Data\taxi-fare-train.csv">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Include="..\..\..\test\data\wikipedia-detox-250-line-data.tsv" Link="Data\wikipedia-detox-250-line-data.tsv">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Include="..\..\..\test\data\wikipedia-detox-250-line-test.tsv" Link="Data\wikipedia-detox-250-line-test.tsv">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+  </ItemGroup>
+
+  <ItemGroup>
+    <None Update="Data\optdigits-test.csv">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+    <None Update="Data\optdigits-train.csv">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+  </ItemGroup>
+
+</Project>
diff --git a/docs/samples/Microsoft.ML.AutoML.Samples/MulticlassClassificationExperiment.cs b/docs/samples/Microsoft.ML.AutoML.Samples/MulticlassClassificationExperiment.cs
@@ -0,0 +1,62 @@
+using System;
+using System.IO;
+using System.Linq;
+using Microsoft.ML.Auto;
+using Microsoft.ML.Data;
+
+namespace Microsoft.ML.AutoML.Samples
+{
+    public static class MulticlassClassificationExperiment
+    {
+        private static string BaseDatasetsLocation = "Data";
+        private static string TrainDataPath = Path.Combine(BaseDatasetsLocation, "optdigits-train.csv");
+        private static string TestDataPath = Path.Combine(BaseDatasetsLocation, "optdigits-test.csv");
+        private static string ModelPath = Path.Combine(BaseDatasetsLocation, "OptDigits.zip");
+        private static string LabelColumnName = "Number";
+        private static uint ExperimentTime = 60;
+
+        public static void Run()
+        {
+            MLContext mlContext = new MLContext();
+
+            // STEP 1: Load data
+            IDataView trainDataView = mlContext.Data.LoadFromTextFile<PixelData>(TrainDataPath, hasHeader: true, separatorChar: ',');
+            IDataView testDataView = mlContext.Data.LoadFromTextFile<PixelData>(TestDataPath, hasHeader: true, separatorChar: ',');
+
+            // STEP 2: Run AutoML experiment
+            Console.WriteLine($"Running AutoML multiclass classification experiment for {ExperimentTime} seconds...");
+            ExperimentResult<MulticlassClassificationMetrics> experimentResult = mlContext.Auto()
+                .CreateMulticlassClassificationExperiment(ExperimentTime)
+                .Execute(trainDataView, LabelColumnName);
+
+            // STEP 3: Print metric from the best model
+            RunDetail<MulticlassClassificationMetrics> best = experimentResult.BestRun;
+            Console.WriteLine($"Total models produced: {experimentResult.RunDetails.Count()}");
+            Console.WriteLine($"Best model's trainer: {best.TrainerName}");
+            Console.WriteLine($"AccuracyMacro of best model from validation data: {best.ValidationMetrics.MacroAccuracy}");
 PrintMetrics(testMetrics); 
 PrintMetrics(testMetrics); 
+
+            // STEP 4: Evaluate test data
+            IDataView testDataViewWithBestScore = best.Model.Transform(testDataView);
+            MulticlassClassificationMetrics testMetrics = mlContext.MulticlassClassification.Evaluate(testDataViewWithBestScore, labelColumnName: LabelColumnName);
+            Console.WriteLine($"AccuracyMacro of best model on test data: {testMetrics.MacroAccuracy}");
+
+            // STEP 5: Save the best model for later deployment and inferencing
+            using (FileStream fs = File.Create(ModelPath))
+                mlContext.Model.Save(best.Model, trainDataView.Schema, fs);
+
+            // STEP 6: Create prediction engine from the best trained model
+            var predictionEngine = mlContext.Model.CreatePredictionEngine<PixelData, PixelPrediction>(best.Model);
+
+            // STEP 7: Initialize new pixel data, and get the predicted number
+            var testPixelData = new PixelData
+            {
+                PixelValues = new float[] { 0, 0, 1, 8, 15, 10, 0, 0, 0, 3, 13, 15, 14, 14, 0, 0, 0, 5, 10, 0, 10, 12, 0, 0, 0, 0, 3, 5, 15, 10, 2, 0, 0, 0, 16, 16, 16, 16, 12, 0, 0, 1, 8, 12, 14, 8, 3, 0, 0, 0, 0, 10, 13, 0, 0, 0, 0, 0, 0, 11, 9, 0, 0, 0 }
+            };
+            var prediction = predictionEngine.Predict(testPixelData);
+            Console.WriteLine($"Predicted number for test pixels: {prediction.Prediction}");
+
+            Console.WriteLine("Press any key to continue...");
+            Console.ReadKey();
+        }
+    }
+}
diff --git a/docs/samples/Microsoft.ML.AutoML.Samples/Program.cs b/docs/samples/Microsoft.ML.AutoML.Samples/Program.cs
@@ -0,0 +1,30 @@
+using System;
+
+namespace Microsoft.ML.AutoML.Samples
+{
+    public class Program
+    {
+        public static void Main(string[] args)
+        {
+            try
+            {
+                RegressionExperiment.Run();
+                Console.Clear();
+
+                BinaryClassificationExperiment.Run();
+                Console.Clear();
+
+                MulticlassClassificationExperiment.Run();
+                Console.Clear();
+
+                Console.WriteLine("Done");
+            }
+            catch (Exception ex)
+            {
+                Console.WriteLine($"Exception {ex}");
+            }
+
+            Console.ReadLine();
+        }
+    }
+}
diff --git a/docs/samples/Microsoft.ML.AutoML.Samples/RegressionExperiment.cs b/docs/samples/Microsoft.ML.AutoML.Samples/RegressionExperiment.cs
@@ -0,0 +1,69 @@
+using System;
+using System.IO;
+using System.Linq;
+using Microsoft.ML.Auto;
+using Microsoft.ML.Data;
+
+namespace Microsoft.ML.AutoML.Samples
+{
+    public static class RegressionExperiment
+    {
+        private static string BaseDatasetsLocation = "Data";
+        private static string TrainDataPath = Path.Combine(BaseDatasetsLocation, "taxi-fare-train.csv");
+        private static string TestDataPath = Path.Combine(BaseDatasetsLocation, "taxi-fare-test.csv");
+        private static string ModelPath = Path.Combine(BaseDatasetsLocation, "TaxiFareModel.zip");
+        private static string LabelColumnName = "FareAmount";
+        private static uint ExperimentTime = 60;
+
+        // STEP 2: Run an AutoML experiment
+
+        public static void Run()
+        {
+            MLContext mlContext = new MLContext();
+
+            // STEP 1: Load data
+            IDataView trainDataView = mlContext.Data.LoadFromTextFile<TaxiTrip>(TrainDataPath, hasHeader: true, separatorChar: ',');
+            IDataView testDataView = mlContext.Data.LoadFromTextFile<TaxiTrip>(TestDataPath, hasHeader: true, separatorChar: ',');
+
+            // STEP 2: Run AutoML experiment
+            Console.WriteLine($"Running AutoML regression experiment for {ExperimentTime} seconds...");
+            ExperimentResult<RegressionMetrics> experimentResult = mlContext.Auto()
+                .CreateRegressionExperiment(ExperimentTime)
+                .Execute(trainDataView, LabelColumnName);
+
+            // STEP 3: Print metric from best model
+            RunDetail<RegressionMetrics> best = experimentResult.BestRun;
+            Console.WriteLine($"Total models produced: {experimentResult.RunDetails.Count()}");
+            Console.WriteLine($"Best model's trainer: {best.TrainerName}");
+            Console.WriteLine($"RSquared of best model from validation data: {best.ValidationMetrics.RSquared}");
+
+            // STEP 5: Evaluate test data
+            IDataView testDataViewWithBestScore = best.Model.Transform(testDataView);
+            RegressionMetrics testMetrics = mlContext.Regression.Evaluate(testDataViewWithBestScore, labelColumnName: LabelColumnName);
+            Console.WriteLine($"RSquared of best model on test data: {testMetrics.RSquared}");
+
+            // STEP 6: Save the best model for later deployment and inferencing
+            using (FileStream fs = File.Create(ModelPath))
+                mlContext.Model.Save(best.Model, trainDataView.Schema, fs);
+
+            // STEP 7: Create prediction engine from the best trained model
+            var predictionEngine = mlContext.Model.CreatePredictionEngine<TaxiTrip, TaxiTripFarePrediction>(best.Model);
+
+            // STEP 8: Initialize a new test taxi trip, and get the predicted fare
+            var testTaxiTrip = new TaxiTrip
+            {
+                VendorId = "VTS",
+                RateCode = 1,
+                PassengerCount = 1,
+                TripTimeInSeconds = 1140,
+                TripDistance = 3.75f,
+                PaymentType = "CRD"
+            };
+            var prediction = predictionEngine.Predict(testTaxiTrip);
+            Console.WriteLine($"Predicted fare for test taxi trip: {prediction.FareAmount}");
+
+            Console.WriteLine("Press any key to continue...");
+            Console.ReadKey();
+        }
+    }
+}