dotnet · OliaG · May 22, 2018 · May 22, 2018 · May 23, 2018 · May 23, 2018
diff --git a/...rted/BinaryClassification_SentimentAnalysis/BinaryClassification_SentimentAnalysis.csproj b/...rted/BinaryClassification_SentimentAnalysis/BinaryClassification_SentimentAnalysis.csproj
@@ -0,0 +1,26 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <TargetFramework>netcoreapp2.0</TargetFramework>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
+    <LangVersion>latest</LangVersion>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="Microsoft.ML" Version="0.1.0" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <Folder Include="Models\" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <None Update="Models\SentimentModel.zip">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+  </ItemGroup>
+
+</Project>
diff --git a/Samples/GettingStarted/BinaryClassification_SentimentAnalysis/Models/SentimentModel.zip b/Samples/GettingStarted/BinaryClassification_SentimentAnalysis/Models/SentimentModel.zip
diff --git a/Samples/GettingStarted/BinaryClassification_SentimentAnalysis/Program.cs b/Samples/GettingStarted/BinaryClassification_SentimentAnalysis/Program.cs
@@ -0,0 +1,102 @@
+using Microsoft.ML;
+using Microsoft.ML.Models;
+using Microsoft.ML.Runtime.Api;
+using Microsoft.ML.Trainers;
+using Microsoft.ML.Transforms;
+using System;
+using System.IO;
+using System.Linq;
+using System.Threading.Tasks;
+
+namespace BinaryClassification_SentimentAnalysis
+{
+    internal static class Program
+    {
+        private static string AppPath => Path.GetDirectoryName(Environment.GetCommandLineArgs()[0]);
+        private static string TrainDataPath => Path.Combine(AppPath, @"..\..\..\..\datasets\", "imdb_labelled.txt");
+        private static string TestDataPath => Path.Combine(AppPath, @"..\..\..\..\datasets\", "yelp_labelled.txt");
+        private static string ModelPath => Path.Combine(AppPath, "Models", "SentimentModel.zip");
+
+        private static async Task Main(string[] args)
+        {
+            var model = await TrainAsync();
+
+            Evaluate(model);
+
+            var predictions = model.Predict(TestSentimentData.Sentiments);
+
+            var sentimentsAndPredictions =
+                TestSentimentData.Sentiments.Zip(predictions, (sentiment, prediction) => (sentiment, prediction));
+            foreach (var item in sentimentsAndPredictions)
+            {
+                Console.WriteLine(
+                    $"Sentiment: {item.sentiment.SentimentText} | Prediction: {(item.prediction.Sentiment ? "Positive" : "Negative")} sentiment");
+            }
+
+            Console.ReadLine();
+        }
+
+        public static async Task<PredictionModel<SentimentData, SentimentPrediction>> TrainAsync()
+        {
+            // LearningPipeline allows us to add steps in order to keep everything together 
+            // during the learning process.  
+            var pipeline = new LearningPipeline();
+
+            // The TextLoader loads a dataset with comments and corresponding postive or negative sentiment. 
+            // When you create a loader you specify the schema by passing a class to the loader containing
+            // all the column names and their types. This will be used to create the model, and train it. 
+            pipeline.Add(new TextLoader<SentimentData>(TrainDataPath, useHeader: false, separator: "tab"));
+
+            // TextFeaturizer is a transform that will be used to featurize an input column. 
+            // This is used to format and clean the data.
+            pipeline.Add(new TextFeaturizer("Features", "SentimentText"));
 pipeline.Add(new TextFeaturizer("Features", "SentimentText") 
 pipeline.Add(new TextFeaturizer("Features", "SentimentText") 
+
+            //add a FastTreeBinaryClassifier, the decision tree learner for this project, and 
+            //three hyperparameters to be used for tuning decision tree performance 
+            pipeline.Add(new FastTreeBinaryClassifier() {NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2});
+
+            Console.WriteLine("=============== Training model ===============");
+            // We train our pipeline based on the dataset that has been loaded and transformed 
+            var model = pipeline.Train<SentimentData, SentimentPrediction>();
+
+            await model.WriteAsync(ModelPath);
+
+            Console.WriteLine("=============== End training ===============");
+            Console.WriteLine("The model is saved to {0}", ModelPath);
+
+            return model;
+        }
+
+        private static void Evaluate(PredictionModel<SentimentData, SentimentPrediction> model)
+        {
+            var testData = new TextLoader<SentimentData>(TestDataPath, useHeader: true, separator: "tab");
+
+            // BinaryClassificationEvaluator computes the quality metrics for the PredictionModel
+            //using the specified data set.
+            var evaluator = new BinaryClassificationEvaluator();
+
+            Console.WriteLine("=============== Evaluating model ===============");
+
+            // BinaryClassificationMetrics contains the overall metrics computed by binary classification evaluators
+            var metrics = evaluator.Evaluate(model, testData);
+
+            // The Accuracy metric gets the accuracy of a classifier which is the proportion 
+            //of correct predictions in the test set.
+
+            // The Auc metric gets the area under the ROC curve.
+            // The area under the ROC curve is equal to the probability that the classifier ranks
+            // a randomly chosen positive instance higher than a randomly chosen negative one
+            // (assuming 'positive' ranks higher than 'negative').
+
+            // The F1Score metric gets the classifier's F1 score.
+            // The F1 score is the harmonic mean of precision and recall:
+            //  2 * precision * recall / (precision + recall).
+
+            Console.WriteLine($"Accuracy: {metrics.Accuracy:P2}");
+            Console.WriteLine($"Auc: {metrics.Auc:P2}");
+            Console.WriteLine($"F1Score: {metrics.F1Score:P2}");
+            Console.WriteLine("=============== End evaluating ===============");
+            Console.WriteLine();
+        }
+    }
+}
diff --git a/Samples/GettingStarted/BinaryClassification_SentimentAnalysis/SentimentData.cs b/Samples/GettingStarted/BinaryClassification_SentimentAnalysis/SentimentData.cs
@@ -0,0 +1,11 @@
+using Microsoft.ML.Runtime.Api;
+
+namespace BinaryClassification_SentimentAnalysis
+{
+    public class SentimentData
+    {
+        [Column("0")] public string SentimentText;
+
+        [Column("1", name: "Label")] public float Sentiment;
+    }
+}
diff --git a/Samples/GettingStarted/BinaryClassification_SentimentAnalysis/SentimentPrediction.cs b/Samples/GettingStarted/BinaryClassification_SentimentAnalysis/SentimentPrediction.cs
@@ -0,0 +1,9 @@
+using Microsoft.ML.Runtime.Api;
+
+namespace BinaryClassification_SentimentAnalysis
+{
+    public class SentimentPrediction
+    {
+        [ColumnName("PredictedLabel")] public bool Sentiment;
+    }
+}
diff --git a/Samples/GettingStarted/BinaryClassification_SentimentAnalysis/TestSentimentData.cs b/Samples/GettingStarted/BinaryClassification_SentimentAnalysis/TestSentimentData.cs
@@ -0,0 +1,26 @@
+using System.Collections.Generic;
+
+namespace BinaryClassification_SentimentAnalysis
+{
+    internal class TestSentimentData
+    {
+        internal static readonly IEnumerable<SentimentData> Sentiments = new[]
+        {
+            new SentimentData
+            {
+                SentimentText = "Contoso's 11 is a wonderful experience",
+                Sentiment = 0
+            },
+            new SentimentData
+            {
+                SentimentText = "The acting in this movie is very bad",
+                Sentiment = 0
+            },
+            new SentimentData
+            {
+                SentimentText = "Joe versus the Volcano Coffee Company is a great film.",
+                Sentiment = 0
+            }
+        };
+    }
+}
diff --git a/Samples/GettingStarted/GettingStarted.sln b/Samples/GettingStarted/GettingStarted.sln
@@ -0,0 +1,37 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 15
+VisualStudioVersion = 15.0.27703.2000
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Regression_TaxiFarePrediction", "Regression_TaxiFarePrediction\Regression_TaxiFarePrediction.csproj", "{C7301D08-10E3-4A51-A70D-7C0BCB39F6E6}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "BinaryClassification_SentimentAnalysis", "BinaryClassification_SentimentAnalysis\BinaryClassification_SentimentAnalysis.csproj", "{ED877F56-5304-4F0D-A75C-4C77219C8D0E}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MulticlassClassification_Iris", "MulticlassClassification_Iris\MulticlassClassification_Iris.csproj", "{EEC2E07E-7482-4F37-8F7A-135EBDEC75B4}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|Any CPU = Debug|Any CPU
+		Release|Any CPU = Release|Any CPU
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{C7301D08-10E3-4A51-A70D-7C0BCB39F6E6}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{C7301D08-10E3-4A51-A70D-7C0BCB39F6E6}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{C7301D08-10E3-4A51-A70D-7C0BCB39F6E6}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{C7301D08-10E3-4A51-A70D-7C0BCB39F6E6}.Release|Any CPU.Build.0 = Release|Any CPU
+		{ED877F56-5304-4F0D-A75C-4C77219C8D0E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{ED877F56-5304-4F0D-A75C-4C77219C8D0E}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{ED877F56-5304-4F0D-A75C-4C77219C8D0E}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{ED877F56-5304-4F0D-A75C-4C77219C8D0E}.Release|Any CPU.Build.0 = Release|Any CPU
+		{EEC2E07E-7482-4F37-8F7A-135EBDEC75B4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{EEC2E07E-7482-4F37-8F7A-135EBDEC75B4}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{EEC2E07E-7482-4F37-8F7A-135EBDEC75B4}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{EEC2E07E-7482-4F37-8F7A-135EBDEC75B4}.Release|Any CPU.Build.0 = Release|Any CPU
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+	GlobalSection(ExtensibilityGlobals) = postSolution
+		SolutionGuid = {B84E804C-06CA-45C8-9B9F-8F69CA930535}
+	EndGlobalSection
+EndGlobal
diff --git a/Samples/GettingStarted/MulticlassClassification_Iris/IrisData.cs b/Samples/GettingStarted/MulticlassClassification_Iris/IrisData.cs
@@ -0,0 +1,17 @@
+using Microsoft.ML.Runtime.Api;
+
+namespace MulticlassClassification_Iris
+{
+    public class IrisData
+    {
+        [Column("0")] public float Label;
+
+        [Column("1")] public float SepalLength;
+
+        [Column("2")] public float SepalWidth;
+
+        [Column("3")] public float PetalLength;
+
+        [Column("4")] public float PetalWidth;
+    }
+}
diff --git a/Samples/GettingStarted/MulticlassClassification_Iris/IrisPrediction.cs b/Samples/GettingStarted/MulticlassClassification_Iris/IrisPrediction.cs
@@ -0,0 +1,9 @@
+using Microsoft.ML.Runtime.Api;
+
+namespace MulticlassClassification_Iris
+{
+    public class IrisPrediction
+    {
+        [ColumnName("Score")] public float[] Score;
+    }
+}
diff --git a/Samples/GettingStarted/MulticlassClassification_Iris/Models/IrisModel.zip b/Samples/GettingStarted/MulticlassClassification_Iris/Models/IrisModel.zip
diff --git a/Samples/GettingStarted/MulticlassClassification_Iris/MulticlassClassification_Iris.csproj b/Samples/GettingStarted/MulticlassClassification_Iris/MulticlassClassification_Iris.csproj
@@ -0,0 +1,22 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <TargetFramework>netcoreapp2.0</TargetFramework>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="Microsoft.ML" Version="0.1.0" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <Folder Include="Models\" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <None Update="Models\IrisModel.zip">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+  </ItemGroup>
+
+</Project>
diff --git a/Samples/GettingStarted/MulticlassClassification_Iris/Program.cs b/Samples/GettingStarted/MulticlassClassification_Iris/Program.cs
@@ -0,0 +1,103 @@
+using System;
+using System.IO;
+using Microsoft.ML.Models;
+using Microsoft.ML.Trainers;
+using Microsoft.ML.Transforms;
+using Microsoft.ML;
+using System.Threading.Tasks;
+
+namespace MulticlassClassification_Iris
+{
+    public static partial class Program
+    {
+        private static string AppPath => Path.GetDirectoryName(Environment.GetCommandLineArgs()[0]);
+        private static string TrainDataPath => Path.Combine(AppPath, @"..\..\..\..\datasets\", "iris_train.txt");
+        private static string TestDataPath => Path.Combine(AppPath, @"..\..\..\..\datasets\", "iris_test.txt");
+        private static string ModelPath => Path.Combine(AppPath,  "Models", "IrisModel.zip");
+
+        private static async Task Main(string[] args)
+        {
+            var model = await TrainAsync();
+
+            Evaluate(model);
+
+            Console.WriteLine();
+            var prediction = model.Predict(TestIrisData.Iris1);
+            Console.WriteLine($"Actual: type 1.     Predicted probability: type 1: {prediction.Score[0]:0.####}");
+            Console.WriteLine($"                                           type 2: {prediction.Score[1]:0.####}");
+            Console.WriteLine($"                                           type 3: {prediction.Score[2]:0.####}");
+            Console.WriteLine();
+
+            prediction = model.Predict(TestIrisData.Iris2);
+            Console.WriteLine($"Actual: type 3.     Predicted probability: type 2: {prediction.Score[0]:0.####}");
+            Console.WriteLine($"                                           type 2: {prediction.Score[1]:0.####}");
+            Console.WriteLine($"                                           type 3: {prediction.Score[2]:0.####}");
+            Console.WriteLine();
+
+            prediction = model.Predict(TestIrisData.Iris3);
+            Console.WriteLine($"Actual: type 2.     Predicted probability: type 1: {prediction.Score[0]:0.####}");
+            Console.WriteLine($"                                           type 2: {prediction.Score[1]:0.####}");
+            Console.WriteLine($"                                           type 3: {prediction.Score[2]:0.####}");
+
+            Console.ReadLine();
+        }
+
+        internal static async Task<PredictionModel<IrisData, IrisPrediction>> TrainAsync()
+        {
+            var pipeline = new LearningPipeline
+            {
+                new TextLoader<IrisData>(TrainDataPath, useHeader: false),
+                new ColumnConcatenator("Features",
+                    "SepalLength",
+                    "SepalWidth",
+                    "PetalLength",
+                    "PetalWidth"),
+                new StochasticDualCoordinateAscentClassifier()
+            };
+
+            Console.WriteLine("=============== Training model ===============");
+
+            var model = pipeline.Train<IrisData, IrisPrediction>();
+
+            await model.WriteAsync(ModelPath);
+
+            Console.WriteLine("=============== End training ===============");
+            Console.WriteLine("The model is saved to {0}", ModelPath);
+
+            return model;
+        }
+
+        private static void Evaluate(PredictionModel<IrisData, IrisPrediction> model)
+        {
+            var testData = new TextLoader<IrisData>(TestDataPath, useHeader: false);
+
+            var evaluator = new ClassificationEvaluator {OutputTopKAcc = 3};
+
+            Console.WriteLine("=============== Evaluating model ===============");
+
+            var metrics = evaluator.Evaluate(model, testData);
+            Console.WriteLine("Metrics:");
+            Console.WriteLine($"    AccuracyMacro = {metrics.AccuracyMacro:0.####}, a value between 0 and 1, the closer to 1, the better");
+            Console.WriteLine($"    AccuracyMicro = {metrics.AccuracyMicro:0.####}, a value between 0 and 1, the closer to 1, the better");
+            Console.WriteLine($"    LogLoss = {metrics.LogLoss:0.####}, the closer to 0, the better");
+            Console.WriteLine($"    LogLoss for class 1 = {metrics.PerClassLogLoss[0]:0.####}, the closer to 0, the better");
+            Console.WriteLine($"    LogLoss for class 2 = {metrics.PerClassLogLoss[1]:0.####}, the closer to 0, the better");
+            Console.WriteLine($"    LogLoss for class 3 = {metrics.PerClassLogLoss[2]:0.####}, the closer to 0, the better");
+            Console.WriteLine();
+            Console.WriteLine($"    ConfusionMatrix:");
+
+            // Print confusion matrix
+            for (var i = 0; i < metrics.ConfusionMatrix.Order; i++)
+            {
+                for (var j = 0; j < metrics.ConfusionMatrix.ClassNames.Count; j++)
+                {
+                    Console.Write("\t" + metrics.ConfusionMatrix[i, j] + "\t");
+                }
+                Console.WriteLine();
+            }
+
+            Console.WriteLine("=============== End evaluating ===============");
+            Console.WriteLine();
+        }
+    }
+}