dotnet · Zruty0 · Aug 24, 2018 · Aug 13, 2018 · Aug 13, 2018 · Aug 13, 2018
diff --git a/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs b/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs
@@ -0,0 +1,96 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using BenchmarkDotNet.Attributes;
+using BenchmarkDotNet.Running;
+using Microsoft.ML.Runtime;
+using Microsoft.ML.Runtime.Api;
+using Microsoft.ML.Runtime.CommandLine;
+using Microsoft.ML.Runtime.Data;
+using Microsoft.ML.Runtime.Learners;
+
+namespace Microsoft.ML.Benchmarks
+{
+    public class KMeansAndLogisticRegressionBench
+    {
+        private static string s_dataPath;
+
+        [Benchmark]
+        public IPredictor TrainKMeansAndLR() => TrainKMeansAndLRCore();
+
+        [GlobalSetup]
+        public void Setup()
+        {
+            s_dataPath = Program.GetDataPath("adult.train");
+        }
+
+        private static IPredictor TrainKMeansAndLRCore()
+        {
+            string dataPath = s_dataPath;
+
+            using (var env = new TlcEnvironment(seed: 1))
+            {
+                // Pipeline
+                var loader = new TextLoader(env,
+                    new TextLoader.Arguments()
+                    {
+                        HasHeader = true,
+                        Separator = ",",
+                        Column = new[] {
+                            new TextLoader.Column()
+                            {
+                                Name = "Label",
+                                Source = new [] { new TextLoader.Range() { Min = 14, Max = 14} },
+                                Type = DataKind.R4
+                            },
+                            new TextLoader.Column()
+                            {
+                                Name = "CatFeatures",
+                                Source = new [] {
+                                    new TextLoader.Range() { Min = 1, Max = 1 },
+                                    new TextLoader.Range() { Min = 3, Max = 3 },
+                                    new TextLoader.Range() { Min = 5, Max = 9 },
+                                    new TextLoader.Range() { Min = 13, Max = 13 }
+                                },
+                                Type = DataKind.TX
+                            },
+                            new TextLoader.Column()
+                            {
+                                Name = "NumFeatures",
+                                Source = new [] {
+                                    new TextLoader.Range() { Min = 0, Max = 0 },
+                                    new TextLoader.Range() { Min = 2, Max = 2 },
+                                    new TextLoader.Range() { Min = 4, Max = 4 },
+                                    new TextLoader.Range() { Min = 10, Max = 12 }
+                                },
+                                Type = DataKind.R4
+                            }
+                        }
+                    }, new MultiFileSource(dataPath));
+
+                IDataTransform trans = CategoricalTransform.Create(env, new CategoricalTransform.Arguments
+                {
+                    Column = new[]
+                    {
+                        new CategoricalTransform.Column { Name = "CatFeatures", Source = "CatFeatures" }
+                    }
+                }, loader);
+
+                trans = NormalizeTransform.CreateMinMaxNormalizer(env, trans, "NumFeatures");
+                trans = new ConcatTransform(env, trans, "Features", "NumFeatures", "CatFeatures");
+                trans = TrainAndScoreTransform.Create(env, new TrainAndScoreTransform.Arguments
+                {
+                    Trainer = new SubComponent<ITrainer, SignatureTrainer>("KMeans", "k=100"),
+                    FeatureColumn = "Features"
+                }, trans);
+                trans = new ConcatTransform(env, trans, "Features", "Features", "Score");
+
+                // Train
+                var trainer = new LogisticRegression(env, new LogisticRegression.Arguments() { EnforceNonNegativity = true, OptTol = 1e-3f });
+                var trainRoles = new RoleMappedData(trans, label: "Label", feature: "Features");
+                return trainer.Train(trainRoles);
+            }
+        }
+    }
+}
diff --git a/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj b/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj
@@ -13,6 +13,7 @@
     <PackageReference Include="BenchmarkDotNet" Version="$(BenchmarkDotNetVersion)" />
   </ItemGroup>
   <ItemGroup>
+    <ProjectReference Include="..\..\src\Microsoft.ML.KMeansClustering\Microsoft.ML.KMeansClustering.csproj" />
     <ProjectReference Include="..\..\src\Microsoft.ML.StandardLearners\Microsoft.ML.StandardLearners.csproj" />
     <ProjectReference Include="..\..\src\Microsoft.ML\Microsoft.ML.csproj" />
   </ItemGroup>

diff --git a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs
@@ -4,9 +4,11 @@
 
 using BenchmarkDotNet.Attributes;
 using BenchmarkDotNet.Engines;
-using Microsoft.ML.Data;
 using Microsoft.ML.Models;
+using Microsoft.ML.Runtime;
 using Microsoft.ML.Runtime.Api;
+using Microsoft.ML.Runtime.Data;
+using Microsoft.ML.Runtime.Learners;
 using Microsoft.ML.Trainers;
 using Microsoft.ML.Transforms;
 using System;
@@ -19,6 +21,7 @@ public class StochasticDualCoordinateAscentClassifierBench
         internal static ClassificationMetrics s_metrics;
         private static PredictionModel<IrisData, IrisPrediction> s_trainedModel;
         private static string s_dataPath;
+        private static string s_sentimentDataPath;
         private static IrisData[][] s_batches;
         private static readonly int[] s_batchSizes = new int[] { 1, 2, 5 };
         private readonly Random r = new Random(0);
@@ -35,10 +38,11 @@ public class StochasticDualCoordinateAscentClassifierBench
         public void Setup()
         {
             s_dataPath = Program.GetDataPath("iris.txt");
+            s_sentimentDataPath = Program.GetDataPath("wikipedia-detox-250-line-data.tsv");
             s_trainedModel = TrainCore();
             IrisPrediction prediction = s_trainedModel.Predict(s_example);
 
-            var testData = new TextLoader(s_dataPath).CreateFrom<IrisData>(useHeader: true);
+            var testData = new Data.TextLoader(s_dataPath).CreateFrom<IrisData>(useHeader: true);
             var evaluator = new ClassificationEvaluator();
             s_metrics = evaluator.Evaluate(s_trainedModel, testData);
 
@@ -69,6 +73,9 @@ public void Setup()
         [Benchmark]
         public void PredictIrisBatchOf5() => Consume(s_trainedModel.Predict(s_batches[2]));
 
+        [Benchmark]
+        public IPredictor TrainSentiment() => TrainSentimentCore();
+
         private void Consume(IEnumerable<IrisPrediction> predictions)
         {
             foreach (var prediction in predictions)
@@ -79,7 +86,7 @@ private static PredictionModel<IrisData, IrisPrediction> TrainCore()
         {
             var pipeline = new LearningPipeline();
 
-            pipeline.Add(new TextLoader(s_dataPath).CreateFrom<IrisData>(useHeader: true));
+            pipeline.Add(new Data.TextLoader(s_dataPath).CreateFrom<IrisData>(useHeader: true));
             pipeline.Add(new ColumnConcatenator(outputColumn: "Features",
                 "SepalLength", "SepalWidth", "PetalLength", "PetalWidth"));
 
@@ -89,6 +96,76 @@ private static PredictionModel<IrisData, IrisPrediction> TrainCore()
             return model;
         }
 
+        private static IPredictor TrainSentimentCore()
+        {
+            var dataPath = s_sentimentDataPath;
+            using (var env = new TlcEnvironment(seed: 1))
+            {
+                // Pipeline
+                var loader = new TextLoader(env,
+                    new TextLoader.Arguments()
+                    {
+                        AllowQuoting = false,
+                        AllowSparse = false,
+                        Separator = "tab",
+                        HasHeader = true,
+                        Column = new[]
+                        {
+                            new TextLoader.Column()
+                            {
+                                Name = "Label",
+                                Source = new [] { new TextLoader.Range() { Min=0, Max=0} },
+                                Type = DataKind.Num
+                            },
+
+                            new TextLoader.Column()
+                            {
+                                Name = "SentimentText",
+                                Source = new [] { new TextLoader.Range() { Min=1, Max=1} },
+                                Type = DataKind.Text
+                            }
+                        }
+                    }, new MultiFileSource(dataPath));
+
+                var text = TextTransform.Create(env,
+                    new TextTransform.Arguments()
+                    {
+                        Column = new TextTransform.Column
+                        {
+                            Name = "WordEmbeddings",
+                            Source = new[] { "SentimentText" }
+                        },
+                        KeepDiacritics = false,
+                        KeepPunctuations = false,
+                        TextCase = Runtime.TextAnalytics.TextNormalizerTransform.CaseNormalizationMode.Lower,
+                        OutputTokens = true,
+                        StopWordsRemover = new Runtime.TextAnalytics.PredefinedStopWordsRemoverFactory(),
+                        VectorNormalizer = TextTransform.TextNormKind.None,
+                        CharFeatureExtractor = null,
+                        WordFeatureExtractor = null,
+                    }, loader);
+
+                var trans = new WordEmbeddingsTransform(env, 
+                    new WordEmbeddingsTransform.Arguments()
+                    {
+                        Column = new WordEmbeddingsTransform.Column[1]
+                        {
+                            new WordEmbeddingsTransform.Column
+                            {
+                                Name = "Features",
+                                Source = "WordEmbeddings_TransformedText"
+                            }
+                        },
+                        ModelKind = WordEmbeddingsTransform.PretrainedModelKind.Sswe,
+                    }, text);
+
+                // Train
+                var trainer = new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments() { MaxIterations = 20 });
+                var trainRoles = new RoleMappedData(trans, label: "Label", feature: "Features");
+                return trainer.Train(trainRoles);
+            }
+        }
+
         public class IrisData
         {
             [Column("0")]