Skip to content

Add new benchmarks to test\Microsoft.ML.Benchmarks #722

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Aug 24, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 96 additions & 0 deletions test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
using Microsoft.ML.Runtime;
using Microsoft.ML.Runtime.Api;
using Microsoft.ML.Runtime.CommandLine;
using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Runtime.Learners;

namespace Microsoft.ML.Benchmarks
{
public class KMeansAndLogisticRegressionBench
{
private static string s_dataPath;

[Benchmark]
public IPredictor TrainKMeansAndLR() => TrainKMeansAndLRCore();

[GlobalSetup]
public void Setup()
{
s_dataPath = Program.GetDataPath("adult.train");
}

private static IPredictor TrainKMeansAndLRCore()
{
string dataPath = s_dataPath;

using (var env = new TlcEnvironment(seed: 1))
{
// Pipeline
var loader = new TextLoader(env,
new TextLoader.Arguments()
{
HasHeader = true,
Separator = ",",
Column = new[] {
new TextLoader.Column()
{
Name = "Label",
Source = new [] { new TextLoader.Range() { Min = 14, Max = 14} },
Type = DataKind.R4
},
new TextLoader.Column()
{
Name = "CatFeatures",
Source = new [] {
new TextLoader.Range() { Min = 1, Max = 1 },
new TextLoader.Range() { Min = 3, Max = 3 },
new TextLoader.Range() { Min = 5, Max = 9 },
new TextLoader.Range() { Min = 13, Max = 13 }
},
Type = DataKind.TX
},
new TextLoader.Column()
{
Name = "NumFeatures",
Source = new [] {
new TextLoader.Range() { Min = 0, Max = 0 },
new TextLoader.Range() { Min = 2, Max = 2 },
new TextLoader.Range() { Min = 4, Max = 4 },
new TextLoader.Range() { Min = 10, Max = 12 }
},
Type = DataKind.R4
}
}
}, new MultiFileSource(dataPath));

IDataTransform trans = CategoricalTransform.Create(env, new CategoricalTransform.Arguments
{
Column = new[]
{
new CategoricalTransform.Column { Name = "CatFeatures", Source = "CatFeatures" }
}
}, loader);

trans = NormalizeTransform.CreateMinMaxNormalizer(env, trans, "NumFeatures");
trans = new ConcatTransform(env, trans, "Features", "NumFeatures", "CatFeatures");
trans = TrainAndScoreTransform.Create(env, new TrainAndScoreTransform.Arguments
{
Trainer = new SubComponent<ITrainer, SignatureTrainer>("KMeans", "k=100"),
FeatureColumn = "Features"
}, trans);
trans = new ConcatTransform(env, trans, "Features", "Features", "Score");

// Train
var trainer = new LogisticRegression(env, new LogisticRegression.Arguments() { EnforceNonNegativity = true, OptTol = 1e-3f });
var trainRoles = new RoleMappedData(trans, label: "Label", feature: "Features");
return trainer.Train(trainRoles);
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
<PackageReference Include="BenchmarkDotNet" Version="$(BenchmarkDotNetVersion)" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\src\Microsoft.ML.KMeansClustering\Microsoft.ML.KMeansClustering.csproj" />
<ProjectReference Include="..\..\src\Microsoft.ML.StandardLearners\Microsoft.ML.StandardLearners.csproj" />
<ProjectReference Include="..\..\src\Microsoft.ML\Microsoft.ML.csproj" />
</ItemGroup>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@

using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Engines;
using Microsoft.ML.Data;
using Microsoft.ML.Models;
using Microsoft.ML.Runtime;
using Microsoft.ML.Runtime.Api;
using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Runtime.Learners;
using Microsoft.ML.Trainers;
using Microsoft.ML.Transforms;
using System;
Expand All @@ -19,6 +21,7 @@ public class StochasticDualCoordinateAscentClassifierBench
internal static ClassificationMetrics s_metrics;
private static PredictionModel<IrisData, IrisPrediction> s_trainedModel;
private static string s_dataPath;
private static string s_sentimentDataPath;
private static IrisData[][] s_batches;
private static readonly int[] s_batchSizes = new int[] { 1, 2, 5 };
private readonly Random r = new Random(0);
Expand All @@ -35,10 +38,11 @@ public class StochasticDualCoordinateAscentClassifierBench
public void Setup()
{
s_dataPath = Program.GetDataPath("iris.txt");
s_sentimentDataPath = Program.GetDataPath("wikipedia-detox-250-line-data.tsv");
s_trainedModel = TrainCore();
IrisPrediction prediction = s_trainedModel.Predict(s_example);

var testData = new TextLoader(s_dataPath).CreateFrom<IrisData>(useHeader: true);
var testData = new Data.TextLoader(s_dataPath).CreateFrom<IrisData>(useHeader: true);
var evaluator = new ClassificationEvaluator();
s_metrics = evaluator.Evaluate(s_trainedModel, testData);

Expand Down Expand Up @@ -69,6 +73,9 @@ public void Setup()
[Benchmark]
public void PredictIrisBatchOf5() => Consume(s_trainedModel.Predict(s_batches[2]));

[Benchmark]
public IPredictor TrainSentiment() => TrainSentimentCore();

private void Consume(IEnumerable<IrisPrediction> predictions)
{
foreach (var prediction in predictions)
Expand All @@ -79,7 +86,7 @@ private static PredictionModel<IrisData, IrisPrediction> TrainCore()
{
var pipeline = new LearningPipeline();

pipeline.Add(new TextLoader(s_dataPath).CreateFrom<IrisData>(useHeader: true));
pipeline.Add(new Data.TextLoader(s_dataPath).CreateFrom<IrisData>(useHeader: true));
pipeline.Add(new ColumnConcatenator(outputColumn: "Features",
"SepalLength", "SepalWidth", "PetalLength", "PetalWidth"));

Expand All @@ -89,6 +96,76 @@ private static PredictionModel<IrisData, IrisPrediction> TrainCore()
return model;
}

private static IPredictor TrainSentimentCore()
{
var dataPath = s_sentimentDataPath;
using (var env = new TlcEnvironment(seed: 1))
{
// Pipeline
var loader = new TextLoader(env,
new TextLoader.Arguments()
{
AllowQuoting = false,
AllowSparse = false,
Separator = "tab",
HasHeader = true,
Column = new[]
{
new TextLoader.Column()
{
Name = "Label",
Source = new [] { new TextLoader.Range() { Min=0, Max=0} },
Type = DataKind.Num
},

new TextLoader.Column()
{
Name = "SentimentText",
Source = new [] { new TextLoader.Range() { Min=1, Max=1} },
Type = DataKind.Text
}
}
}, new MultiFileSource(dataPath));

var text = TextTransform.Create(env,
new TextTransform.Arguments()
{
Column = new TextTransform.Column
{
Name = "WordEmbeddings",
Source = new[] { "SentimentText" }
},
KeepDiacritics = false,
KeepPunctuations = false,
TextCase = Runtime.TextAnalytics.TextNormalizerTransform.CaseNormalizationMode.Lower,
OutputTokens = true,
StopWordsRemover = new Runtime.TextAnalytics.PredefinedStopWordsRemoverFactory(),
VectorNormalizer = TextTransform.TextNormKind.None,
CharFeatureExtractor = null,
WordFeatureExtractor = null,
}, loader);

var trans = new WordEmbeddingsTransform(env,
new WordEmbeddingsTransform.Arguments()
{
Column = new WordEmbeddingsTransform.Column[1]
{
new WordEmbeddingsTransform.Column
{
Name = "Features",
Source = "WordEmbeddings_TransformedText"
}
},
ModelKind = WordEmbeddingsTransform.PretrainedModelKind.Sswe,
}, text);

// Train
var trainer = new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments() { MaxIterations = 20 });
var trainRoles = new RoleMappedData(trans, label: "Label", feature: "Features");
return trainer.Train(trainRoles);
}
}

public class IrisData
{
[Column("0")]
Expand Down