From b0f3d2c7ec35129bb0d70f25e61817bd966f5c60 Mon Sep 17 00:00:00 2001 From: Yael Dekel Date: Mon, 13 Aug 2018 09:58:44 -0700 Subject: [PATCH 1/6] Add sentiment test with SDCA. --- .../SentimentPredictionTests.cs | 72 +++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/SentimentPredictionTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/SentimentPredictionTests.cs index e42de17090..9cb0c70eb1 100644 --- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/SentimentPredictionTests.cs +++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/SentimentPredictionTests.cs @@ -8,6 +8,7 @@ using Microsoft.ML.Runtime.Data; using Microsoft.ML.Runtime.FastTree; using Microsoft.ML.Runtime.Internal.Calibration; +using Microsoft.ML.Runtime.Learners; using Microsoft.ML.Runtime.Model; using Microsoft.ML.Trainers; using Microsoft.ML.Transforms; @@ -171,6 +172,77 @@ public void TrainAndPredictSentimentModelWithDirectionInstantiationTestWithWordE Assert.Equal(1.0, (double)summary[0].Value, 1); } } + + [Fact] + public void TrainAndPredictSentimentModelWithDirectionInstantiationTestWithWordEmbeddingForBenchmarking() + { + var dataPath = GetDataPath(SentimentDataPath); + var testDataPath = GetDataPath(SentimentTestPath); + + using (var env = new TlcEnvironment(seed: 1)) + { + // Pipeline + var loader = new TextLoader(env, + new TextLoader.Arguments() + { + Separator = "tab", + HasHeader = true, + Column = new[] + { + new TextLoader.Column() + { + Name = "Label", + Source = new [] { new TextLoader.Range() { Min=0, Max=0} }, + Type = DataKind.Num + }, + + new TextLoader.Column() + { + Name = "SentimentText", + Source = new [] { new TextLoader.Range() { Min=1, Max=1} }, + Type = DataKind.Text + } + } + }, new MultiFileSource(dataPath)); + + var text = TextTransform.Create(env, new TextTransform.Arguments() + { + Column = new TextTransform.Column + { + Name = "WordEmbeddings", + Source = new[] { "SentimentText" } + }, + KeepDiacritics = false, + KeepPunctuations = false, + TextCase = Runtime.TextAnalytics.TextNormalizerTransform.CaseNormalizationMode.Lower, + OutputTokens = true, + StopWordsRemover = new Runtime.TextAnalytics.PredefinedStopWordsRemoverFactory(), + VectorNormalizer = TextTransform.TextNormKind.None, + CharFeatureExtractor = null, + WordFeatureExtractor = null, + }, + loader); + + var trans = new WordEmbeddingsTransform(env, new WordEmbeddingsTransform.Arguments() + { + Column = new WordEmbeddingsTransform.Column[1] + { + new WordEmbeddingsTransform.Column + { + Name = "Features", + Source = "WordEmbeddings_TransformedText" + } + }, + ModelKind = WordEmbeddingsTransform.PretrainedModelKind.Sswe, + }, text); + // Train + var trainer = new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments()); + + var trainRoles = new RoleMappedData(trans, label: "Label", feature: "Features"); + var pred = trainer.Train(trainRoles); + } + } + private BinaryClassificationMetrics EvaluateBinary(IHostEnvironment env, IDataView scoredData) { var dataEval = new RoleMappedData(scoredData, label: "Label", feature: "Features", opt: true); From 3f48dd45d4a71074e8c12ce42d3be493c1fb85d3 Mon Sep 17 00:00:00 2001 From: Yael Dekel Date: Mon, 13 Aug 2018 11:41:23 -0700 Subject: [PATCH 2/6] Add a test with KMeans and LR. --- ...cRegressionWithClusteringFeaturizerTest.cs | 82 +++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/LogisticRegressionWithClusteringFeaturizerTest.cs diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/LogisticRegressionWithClusteringFeaturizerTest.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/LogisticRegressionWithClusteringFeaturizerTest.cs new file mode 100644 index 0000000000..79c5115da9 --- /dev/null +++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/LogisticRegressionWithClusteringFeaturizerTest.cs @@ -0,0 +1,82 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.ML.Runtime; +using Microsoft.ML.Runtime.CommandLine; +using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Runtime.Learners; +using Xunit; + +namespace Microsoft.ML.Scenarios +{ + public partial class ScenariosTests + { + [Fact] + public void KMeansAndLRTest() + { + string dataPath = GetDataPath("adult.train"); + + using (var env = new TlcEnvironment(seed: 1, conc: 1)) + { + // Pipeline + var loader = new TextLoader(env, + new TextLoader.Arguments() + { + HasHeader = true, + Separator = ",", + Column = new[] { + new TextLoader.Column() + { + Name = "Label", + Source = new [] { new TextLoader.Range() { Min = 14, Max = 14} }, + Type = DataKind.R4 + }, + new TextLoader.Column() + { + Name = "CatFeatures", + Source = new [] { + new TextLoader.Range() { Min = 1, Max = 1 }, + new TextLoader.Range() { Min = 3, Max = 3 }, + new TextLoader.Range() { Min = 5, Max = 9 }, + new TextLoader.Range() { Min = 13, Max = 13 } + }, + Type = DataKind.R4 + }, + new TextLoader.Column() + { + Name = "NumFeatures", + Source = new [] { + new TextLoader.Range() { Min = 0, Max = 0 }, + new TextLoader.Range() { Min = 2, Max = 2 }, + new TextLoader.Range() { Min = 4, Max = 4 }, + new TextLoader.Range() { Min = 10, Max = 12 } + }, + Type = DataKind.R4 + } + } + }, new MultiFileSource(dataPath)); + + IDataTransform trans = CategoricalTransform.Create(env, new CategoricalTransform.Arguments + { + Column = new [] + { + new CategoricalTransform.Column { Name = "CatFeatures", Source = "CatFeatures" } + } + }, loader); + + trans = NormalizeTransform.CreateMinMaxNormalizer(env, trans, "NumFeatures"); + trans = new ConcatTransform(env, trans, "Features", "NumFeatures", "CatFeatures"); + trans = TrainAndScoreTransform.Create(env, new TrainAndScoreTransform.Arguments + { + Trainer = new SubComponent("KMeans", "k=300"), + FeatureColumn = "Features" + }, trans); + trans = new ConcatTransform(env, trans, "Features", "Features", "Score"); + + // Train + var trainer = new LogisticRegression(env, new LogisticRegression.Arguments() { EnforceNonNegativity = true }); + } + } + } +} From 7e9651aa52f23e3f98180607b3eba6077d2ccefd Mon Sep 17 00:00:00 2001 From: Yael Dekel Date: Mon, 13 Aug 2018 13:11:03 -0700 Subject: [PATCH 3/6] Add LR training to test --- .../LogisticRegressionWithClusteringFeaturizerTest.cs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/LogisticRegressionWithClusteringFeaturizerTest.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/LogisticRegressionWithClusteringFeaturizerTest.cs index 79c5115da9..0057342dbb 100644 --- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/LogisticRegressionWithClusteringFeaturizerTest.cs +++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/LogisticRegressionWithClusteringFeaturizerTest.cs @@ -69,13 +69,15 @@ public void KMeansAndLRTest() trans = new ConcatTransform(env, trans, "Features", "NumFeatures", "CatFeatures"); trans = TrainAndScoreTransform.Create(env, new TrainAndScoreTransform.Arguments { - Trainer = new SubComponent("KMeans", "k=300"), + Trainer = new SubComponent("KMeans", "k=200"), FeatureColumn = "Features" }, trans); trans = new ConcatTransform(env, trans, "Features", "Features", "Score"); // Train - var trainer = new LogisticRegression(env, new LogisticRegression.Arguments() { EnforceNonNegativity = true }); + var trainer = new LogisticRegression(env, new LogisticRegression.Arguments() { EnforceNonNegativity = true, OptTol = 1e-3f }); + var trainRoles = new RoleMappedData(trans, label: "Label", feature: "Features"); + var pred = trainer.Train(trainRoles); } } } From edaa1604bb2d233b2daab1d1db3047e49624efd6 Mon Sep 17 00:00:00 2001 From: Yael Dekel Date: Wed, 15 Aug 2018 11:35:18 -0700 Subject: [PATCH 4/6] Convert unit tests to benchmark tests, with merge conflicts fixed. --- .../Models/ClassificationMetrics.cs | 1 + .../KMeansAndLogisticRegressionBench.cs} | 61 +++++++++++--- .../Microsoft.ML.Benchmarks.csproj | 1 + ...sticDualCoordinateAscentClassifierBench.cs | 82 ++++++++++++++++++- .../SentimentPredictionTests.cs | 70 ---------------- 5 files changed, 130 insertions(+), 85 deletions(-) rename test/{Microsoft.ML.Tests/ScenariosWithDirectInstantiation/LogisticRegressionWithClusteringFeaturizerTest.cs => Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs} (68%) diff --git a/src/Microsoft.ML/Models/ClassificationMetrics.cs b/src/Microsoft.ML/Models/ClassificationMetrics.cs index 6c1c139278..d7bde3dd08 100644 --- a/src/Microsoft.ML/Models/ClassificationMetrics.cs +++ b/src/Microsoft.ML/Models/ClassificationMetrics.cs @@ -15,6 +15,7 @@ namespace Microsoft.ML.Models /// public sealed class ClassificationMetrics { + public static ClassificationMetrics Empty = new ClassificationMetrics(); private ClassificationMetrics() { } diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/LogisticRegressionWithClusteringFeaturizerTest.cs b/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs similarity index 68% rename from test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/LogisticRegressionWithClusteringFeaturizerTest.cs rename to test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs index 0057342dbb..1cf302c265 100644 --- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/LogisticRegressionWithClusteringFeaturizerTest.cs +++ b/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs @@ -2,22 +2,35 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using BenchmarkDotNet.Attributes; +using BenchmarkDotNet.Running; using Microsoft.ML.Runtime; +using Microsoft.ML.Runtime.Api; using Microsoft.ML.Runtime.CommandLine; using Microsoft.ML.Runtime.Data; using Microsoft.ML.Runtime.Learners; -using Xunit; -namespace Microsoft.ML.Scenarios +namespace Microsoft.ML.Benchmarks { - public partial class ScenariosTests + public class KMeansAndLogisticRegressionBench { - [Fact] - public void KMeansAndLRTest() + private static string s_dataPath; + + [Benchmark] + public IPredictor TrainKMeansAndLR() => TrainKMeansAndLRCore(); + + [GlobalSetup] + public void Setup() + { + s_dataPath = Program.GetDataPath("adult.train"); + StochasticDualCoordinateAscentClassifierBench.s_metrics = Models.ClassificationMetrics.Empty; + } + + private static IPredictor TrainKMeansAndLRCore() { - string dataPath = GetDataPath("adult.train"); + string dataPath = s_dataPath; - using (var env = new TlcEnvironment(seed: 1, conc: 1)) + using (var env = new TlcEnvironment(seed: 1)) { // Pipeline var loader = new TextLoader(env, @@ -41,7 +54,7 @@ public void KMeansAndLRTest() new TextLoader.Range() { Min = 5, Max = 9 }, new TextLoader.Range() { Min = 13, Max = 13 } }, - Type = DataKind.R4 + Type = DataKind.TX }, new TextLoader.Column() { @@ -59,7 +72,7 @@ public void KMeansAndLRTest() IDataTransform trans = CategoricalTransform.Create(env, new CategoricalTransform.Arguments { - Column = new [] + Column = new[] { new CategoricalTransform.Column { Name = "CatFeatures", Source = "CatFeatures" } } @@ -69,16 +82,40 @@ public void KMeansAndLRTest() trans = new ConcatTransform(env, trans, "Features", "NumFeatures", "CatFeatures"); trans = TrainAndScoreTransform.Create(env, new TrainAndScoreTransform.Arguments { - Trainer = new SubComponent("KMeans", "k=200"), - FeatureColumn = "Features" + Trainer = new SubComponent("KMeans", "k=100"), + FeatureColumn = "Features" }, trans); trans = new ConcatTransform(env, trans, "Features", "Features", "Score"); // Train var trainer = new LogisticRegression(env, new LogisticRegression.Arguments() { EnforceNonNegativity = true, OptTol = 1e-3f }); var trainRoles = new RoleMappedData(trans, label: "Label", feature: "Features"); - var pred = trainer.Train(trainRoles); + return trainer.Train(trainRoles); } } + + public class IrisData + { + [Column("0")] + public float Label; + + [Column("1")] + public float SepalLength; + + [Column("2")] + public float SepalWidth; + + [Column("3")] + public float PetalLength; + + [Column("4")] + public float PetalWidth; + } + + public class IrisPrediction + { + [ColumnName("Score")] + public float[] PredictedLabels; + } } } diff --git a/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj b/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj index 7f0af862e0..5a9f3e7467 100644 --- a/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj +++ b/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj @@ -13,6 +13,7 @@ + diff --git a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs index a01cf8f613..46c1cf0f93 100644 --- a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs +++ b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs @@ -4,9 +4,11 @@ using BenchmarkDotNet.Attributes; using BenchmarkDotNet.Engines; -using Microsoft.ML.Data; using Microsoft.ML.Models; +using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.Api; +using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Runtime.Learners; using Microsoft.ML.Trainers; using Microsoft.ML.Transforms; using System; @@ -19,6 +21,7 @@ public class StochasticDualCoordinateAscentClassifierBench internal static ClassificationMetrics s_metrics; private static PredictionModel s_trainedModel; private static string s_dataPath; + private static string s_sentimentDataPath; private static IrisData[][] s_batches; private static readonly int[] s_batchSizes = new int[] { 1, 2, 5 }; private readonly Random r = new Random(0); @@ -35,10 +38,11 @@ public class StochasticDualCoordinateAscentClassifierBench public void Setup() { s_dataPath = Program.GetDataPath("iris.txt"); + s_sentimentDataPath = Program.GetDataPath("wikipedia-detox-250-line-data.tsv"); s_trainedModel = TrainCore(); IrisPrediction prediction = s_trainedModel.Predict(s_example); - var testData = new TextLoader(s_dataPath).CreateFrom(useHeader: true); + var testData = new Data.TextLoader(s_dataPath).CreateFrom(useHeader: true); var evaluator = new ClassificationEvaluator(); s_metrics = evaluator.Evaluate(s_trainedModel, testData); @@ -69,6 +73,9 @@ public void Setup() [Benchmark] public void PredictIrisBatchOf5() => Consume(s_trainedModel.Predict(s_batches[2])); + [Benchmark] + public IPredictor TrainSentiment() => TrainSentimentCore(); + private void Consume(IEnumerable predictions) { foreach (var prediction in predictions) @@ -79,7 +86,7 @@ private static PredictionModel TrainCore() { var pipeline = new LearningPipeline(); - pipeline.Add(new TextLoader(s_dataPath).CreateFrom(useHeader: true)); + pipeline.Add(new Data.TextLoader(s_dataPath).CreateFrom(useHeader: true)); pipeline.Add(new ColumnConcatenator(outputColumn: "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")); @@ -89,6 +96,75 @@ private static PredictionModel TrainCore() return model; } + private static IPredictor TrainSentimentCore() + { + var dataPath = s_sentimentDataPath; + using (var env = new TlcEnvironment(seed: 1)) + { + // Pipeline + var loader = new TextLoader(env, + new TextLoader.Arguments() + { + AllowQuoting = false, + AllowSparse = false, + Separator = "tab", + HasHeader = true, + Column = new[] + { + new TextLoader.Column() + { + Name = "Label", + Source = new [] { new TextLoader.Range() { Min=0, Max=0} }, + Type = DataKind.Num + }, + + new TextLoader.Column() + { + Name = "SentimentText", + Source = new [] { new TextLoader.Range() { Min=1, Max=1} }, + Type = DataKind.Text + } + } + }, new MultiFileSource(dataPath)); + + var text = TextTransform.Create(env, new TextTransform.Arguments() + { + Column = new TextTransform.Column + { + Name = "WordEmbeddings", + Source = new[] { "SentimentText" } + }, + KeepDiacritics = false, + KeepPunctuations = false, + TextCase = Runtime.TextAnalytics.TextNormalizerTransform.CaseNormalizationMode.Lower, + OutputTokens = true, + StopWordsRemover = new Runtime.TextAnalytics.PredefinedStopWordsRemoverFactory(), + VectorNormalizer = TextTransform.TextNormKind.None, + CharFeatureExtractor = null, + WordFeatureExtractor = null, + }, + loader); + + var trans = new WordEmbeddingsTransform(env, new WordEmbeddingsTransform.Arguments() + { + Column = new WordEmbeddingsTransform.Column[1] + { + new WordEmbeddingsTransform.Column + { + Name = "Features", + Source = "WordEmbeddings_TransformedText" + } + }, + ModelKind = WordEmbeddingsTransform.PretrainedModelKind.Sswe, + }, text); + // Train + var trainer = new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments() { MaxIterations = 20 }); + + var trainRoles = new RoleMappedData(trans, label: "Label", feature: "Features"); + return trainer.Train(trainRoles); + } + } + public class IrisData { [Column("0")] diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/SentimentPredictionTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/SentimentPredictionTests.cs index 9cb0c70eb1..c51368aeb2 100644 --- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/SentimentPredictionTests.cs +++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/SentimentPredictionTests.cs @@ -173,76 +173,6 @@ public void TrainAndPredictSentimentModelWithDirectionInstantiationTestWithWordE } } - [Fact] - public void TrainAndPredictSentimentModelWithDirectionInstantiationTestWithWordEmbeddingForBenchmarking() - { - var dataPath = GetDataPath(SentimentDataPath); - var testDataPath = GetDataPath(SentimentTestPath); - - using (var env = new TlcEnvironment(seed: 1)) - { - // Pipeline - var loader = new TextLoader(env, - new TextLoader.Arguments() - { - Separator = "tab", - HasHeader = true, - Column = new[] - { - new TextLoader.Column() - { - Name = "Label", - Source = new [] { new TextLoader.Range() { Min=0, Max=0} }, - Type = DataKind.Num - }, - - new TextLoader.Column() - { - Name = "SentimentText", - Source = new [] { new TextLoader.Range() { Min=1, Max=1} }, - Type = DataKind.Text - } - } - }, new MultiFileSource(dataPath)); - - var text = TextTransform.Create(env, new TextTransform.Arguments() - { - Column = new TextTransform.Column - { - Name = "WordEmbeddings", - Source = new[] { "SentimentText" } - }, - KeepDiacritics = false, - KeepPunctuations = false, - TextCase = Runtime.TextAnalytics.TextNormalizerTransform.CaseNormalizationMode.Lower, - OutputTokens = true, - StopWordsRemover = new Runtime.TextAnalytics.PredefinedStopWordsRemoverFactory(), - VectorNormalizer = TextTransform.TextNormKind.None, - CharFeatureExtractor = null, - WordFeatureExtractor = null, - }, - loader); - - var trans = new WordEmbeddingsTransform(env, new WordEmbeddingsTransform.Arguments() - { - Column = new WordEmbeddingsTransform.Column[1] - { - new WordEmbeddingsTransform.Column - { - Name = "Features", - Source = "WordEmbeddings_TransformedText" - } - }, - ModelKind = WordEmbeddingsTransform.PretrainedModelKind.Sswe, - }, text); - // Train - var trainer = new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments()); - - var trainRoles = new RoleMappedData(trans, label: "Label", feature: "Features"); - var pred = trainer.Train(trainRoles); - } - } - private BinaryClassificationMetrics EvaluateBinary(IHostEnvironment env, IDataView scoredData) { var dataEval = new RoleMappedData(scoredData, label: "Label", feature: "Features", opt: true); From 6fa909e445741d4f7b66cf17f2801d566b21bfc3 Mon Sep 17 00:00:00 2001 From: Brian Lui Date: Thu, 23 Aug 2018 13:08:31 -0700 Subject: [PATCH 5/6] Respond to PR feedback --- .../Models/ClassificationMetrics.cs | 1 - .../KMeansAndLogisticRegressionBench.cs | 25 ----- ...sticDualCoordinateAscentClassifierBench.cs | 97 ++++++++++--------- .../SentimentPredictionTests.cs | 6 -- 4 files changed, 49 insertions(+), 80 deletions(-) diff --git a/src/Microsoft.ML/Models/ClassificationMetrics.cs b/src/Microsoft.ML/Models/ClassificationMetrics.cs index d7bde3dd08..6c1c139278 100644 --- a/src/Microsoft.ML/Models/ClassificationMetrics.cs +++ b/src/Microsoft.ML/Models/ClassificationMetrics.cs @@ -15,7 +15,6 @@ namespace Microsoft.ML.Models /// public sealed class ClassificationMetrics { - public static ClassificationMetrics Empty = new ClassificationMetrics(); private ClassificationMetrics() { } diff --git a/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs b/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs index 1cf302c265..f13bb7260e 100644 --- a/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs +++ b/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs @@ -23,7 +23,6 @@ public class KMeansAndLogisticRegressionBench public void Setup() { s_dataPath = Program.GetDataPath("adult.train"); - StochasticDualCoordinateAscentClassifierBench.s_metrics = Models.ClassificationMetrics.Empty; } private static IPredictor TrainKMeansAndLRCore() @@ -93,29 +92,5 @@ private static IPredictor TrainKMeansAndLRCore() return trainer.Train(trainRoles); } } - - public class IrisData - { - [Column("0")] - public float Label; - - [Column("1")] - public float SepalLength; - - [Column("2")] - public float SepalWidth; - - [Column("3")] - public float PetalLength; - - [Column("4")] - public float PetalWidth; - } - - public class IrisPrediction - { - [ColumnName("Score")] - public float[] PredictedLabels; - } } } diff --git a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs index 46c1cf0f93..ce8cb1aad0 100644 --- a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs +++ b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs @@ -103,63 +103,64 @@ private static IPredictor TrainSentimentCore() { // Pipeline var loader = new TextLoader(env, - new TextLoader.Arguments() - { - AllowQuoting = false, - AllowSparse = false, - Separator = "tab", - HasHeader = true, - Column = new[] + new TextLoader.Arguments() { - new TextLoader.Column() - { - Name = "Label", - Source = new [] { new TextLoader.Range() { Min=0, Max=0} }, - Type = DataKind.Num - }, - - new TextLoader.Column() + AllowQuoting = false, + AllowSparse = false, + Separator = "tab", + HasHeader = true, + Column = new[] { - Name = "SentimentText", - Source = new [] { new TextLoader.Range() { Min=1, Max=1} }, - Type = DataKind.Text + new TextLoader.Column() + { + Name = "Label", + Source = new [] { new TextLoader.Range() { Min=0, Max=0} }, + Type = DataKind.Num + }, + + new TextLoader.Column() + { + Name = "SentimentText", + Source = new [] { new TextLoader.Range() { Min=1, Max=1} }, + Type = DataKind.Text + } } - } - }, new MultiFileSource(dataPath)); + }, new MultiFileSource(dataPath)); - var text = TextTransform.Create(env, new TextTransform.Arguments() - { - Column = new TextTransform.Column + var text = TextTransform.Create(env, + new TextTransform.Arguments() { - Name = "WordEmbeddings", - Source = new[] { "SentimentText" } - }, - KeepDiacritics = false, - KeepPunctuations = false, - TextCase = Runtime.TextAnalytics.TextNormalizerTransform.CaseNormalizationMode.Lower, - OutputTokens = true, - StopWordsRemover = new Runtime.TextAnalytics.PredefinedStopWordsRemoverFactory(), - VectorNormalizer = TextTransform.TextNormKind.None, - CharFeatureExtractor = null, - WordFeatureExtractor = null, - }, - loader); - - var trans = new WordEmbeddingsTransform(env, new WordEmbeddingsTransform.Arguments() - { - Column = new WordEmbeddingsTransform.Column[1] + Column = new TextTransform.Column + { + Name = "WordEmbeddings", + Source = new[] { "SentimentText" } + }, + KeepDiacritics = false, + KeepPunctuations = false, + TextCase = Runtime.TextAnalytics.TextNormalizerTransform.CaseNormalizationMode.Lower, + OutputTokens = true, + StopWordsRemover = new Runtime.TextAnalytics.PredefinedStopWordsRemoverFactory(), + VectorNormalizer = TextTransform.TextNormKind.None, + CharFeatureExtractor = null, + WordFeatureExtractor = null, + }, loader); + + var trans = new WordEmbeddingsTransform(env, + new WordEmbeddingsTransform.Arguments() { - new WordEmbeddingsTransform.Column + Column = new WordEmbeddingsTransform.Column[1] { - Name = "Features", - Source = "WordEmbeddings_TransformedText" - } - }, - ModelKind = WordEmbeddingsTransform.PretrainedModelKind.Sswe, - }, text); + new WordEmbeddingsTransform.Column + { + Name = "Features", + Source = "WordEmbeddings_TransformedText" + } + }, + ModelKind = WordEmbeddingsTransform.PretrainedModelKind.Sswe, + }, text); + // Train var trainer = new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments() { MaxIterations = 20 }); - var trainRoles = new RoleMappedData(trans, label: "Label", feature: "Features"); return trainer.Train(trainRoles); } diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/SentimentPredictionTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/SentimentPredictionTests.cs index c51368aeb2..2deeb7b4ce 100644 --- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/SentimentPredictionTests.cs +++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/SentimentPredictionTests.cs @@ -8,12 +8,6 @@ using Microsoft.ML.Runtime.Data; using Microsoft.ML.Runtime.FastTree; using Microsoft.ML.Runtime.Internal.Calibration; -using Microsoft.ML.Runtime.Learners; -using Microsoft.ML.Runtime.Model; -using Microsoft.ML.Trainers; -using Microsoft.ML.Transforms; -using System.Collections.Generic; -using System.IO; using System.Linq; using Xunit; From d749c8c0746d8951f2209727920ccbb599558ee3 Mon Sep 17 00:00:00 2001 From: Brian Lui Date: Thu, 23 Aug 2018 13:59:40 -0700 Subject: [PATCH 6/6] Respond to PR feedback: Revert changes to SentimentPredictionTests.cs --- .../SentimentPredictionTests.cs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/SentimentPredictionTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/SentimentPredictionTests.cs index 2deeb7b4ce..e42de17090 100644 --- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/SentimentPredictionTests.cs +++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/SentimentPredictionTests.cs @@ -8,6 +8,11 @@ using Microsoft.ML.Runtime.Data; using Microsoft.ML.Runtime.FastTree; using Microsoft.ML.Runtime.Internal.Calibration; +using Microsoft.ML.Runtime.Model; +using Microsoft.ML.Trainers; +using Microsoft.ML.Transforms; +using System.Collections.Generic; +using System.IO; using System.Linq; using Xunit; @@ -166,7 +171,6 @@ public void TrainAndPredictSentimentModelWithDirectionInstantiationTestWithWordE Assert.Equal(1.0, (double)summary[0].Value, 1); } } - private BinaryClassificationMetrics EvaluateBinary(IHostEnvironment env, IDataView scoredData) { var dataEval = new RoleMappedData(scoredData, label: "Label", feature: "Features", opt: true);