Skip to content

different config files for train and predict benchmarks #954

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 23, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 40 additions & 26 deletions test/Microsoft.ML.Benchmarks/Numeric/Ranking.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,13 @@

namespace Microsoft.ML.Benchmarks
{
public class Ranking
[Config(typeof(TrainConfig))]
public class RankingTrain
{
private string _mslrWeb10k_Validate;
private string _mslrWeb10k_Train;
private string _mslrWeb10k_Test;
private string _modelPath_MSLR;

[GlobalSetup(Targets = new string[] {
nameof(TrainTest_Ranking_MSLRWeb10K_RawNumericFeatures_FastTreeRanking),
nameof(TrainTest_Ranking_MSLRWeb10K_RawNumericFeatures_LightGBMRanking) })]
[GlobalSetup]
public void SetupTrainingSpeedTests()
{
_mslrWeb10k_Validate = Path.GetFullPath(TestDatasets.MSLRWeb.validFilename);
Expand All @@ -33,24 +30,15 @@ public void SetupTrainingSpeedTests()
throw new FileNotFoundException(string.Format(Helpers.DatasetNotFound, _mslrWeb10k_Train));
}

[GlobalSetup(Target = nameof(Test_Ranking_MSLRWeb10K_RawNumericFeatures_FastTreeRanking))]
public void SetupScoringSpeedTests()
[Benchmark]
public void TrainTest_Ranking_MSLRWeb10K_RawNumericFeatures_FastTreeRanking()
{
_mslrWeb10k_Test = Path.GetFullPath(TestDatasets.MSLRWeb.testFilename);
if (!File.Exists(_mslrWeb10k_Test))
throw new FileNotFoundException(string.Format(Helpers.DatasetNotFound, _mslrWeb10k_Test));

SetupTrainingSpeedTests();
_modelPath_MSLR = Path.Combine(Directory.GetCurrentDirectory(), @"FastTreeRankingModel.zip");

string cmd = @"TrainTest test=" + _mslrWeb10k_Validate +
" eval=RankingEvaluator{t=10}" +
" data=" + _mslrWeb10k_Train +
" loader=TextLoader{col=Label:R4:0 col=GroupId:TX:1 col=Features:R4:2-138}" +
" xf=HashTransform{col=GroupId}" +
" xf=NAHandleTransform{col=Features}" +
" tr=FastTreeRanking{}" +
" out={" + _modelPath_MSLR + "}";
" xf=HashTransform{col=GroupId} xf=NAHandleTransform{col=Features}" +
" tr=FastTreeRanking{}";

using (var environment = new ConsoleEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance))
{
Expand All @@ -59,31 +47,57 @@ public void SetupScoringSpeedTests()
}

[Benchmark]
public void TrainTest_Ranking_MSLRWeb10K_RawNumericFeatures_FastTreeRanking()
public void TrainTest_Ranking_MSLRWeb10K_RawNumericFeatures_LightGBMRanking()
{
string cmd = @"TrainTest test=" + _mslrWeb10k_Validate +
" eval=RankingEvaluator{t=10}" +
" data=" + _mslrWeb10k_Train +
" loader=TextLoader{col=Label:R4:0 col=GroupId:TX:1 col=Features:R4:2-138}" +
" xf=HashTransform{col=GroupId} xf=NAHandleTransform{col=Features}" +
" tr=FastTreeRanking{}";
" xf=HashTransform{col=GroupId}" +
" xf=NAHandleTransform{col=Features}" +
" tr=LightGBMRanking{}";

using (var environment = new ConsoleEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance))
{
Maml.MainCore(environment, cmd, alwaysPrintStacktrace: false);
}
}
}

[Benchmark]
public void TrainTest_Ranking_MSLRWeb10K_RawNumericFeatures_LightGBMRanking()
[Config(typeof(PredictConfig))]
public class RankingTest
{
private string _mslrWeb10k_Validate;
private string _mslrWeb10k_Train;
private string _mslrWeb10k_Test;
private string _modelPath_MSLR;

[GlobalSetup]
public void SetupScoringSpeedTests()
{
_mslrWeb10k_Test = Path.GetFullPath(TestDatasets.MSLRWeb.testFilename);
_mslrWeb10k_Validate = Path.GetFullPath(TestDatasets.MSLRWeb.validFilename);
_mslrWeb10k_Train = Path.GetFullPath(TestDatasets.MSLRWeb.trainFilename);

if (!File.Exists(_mslrWeb10k_Test))
throw new FileNotFoundException(string.Format(Helpers.DatasetNotFound, _mslrWeb10k_Test));

if (!File.Exists(_mslrWeb10k_Validate))
throw new FileNotFoundException(string.Format(Helpers.DatasetNotFound, _mslrWeb10k_Validate));

if (!File.Exists(_mslrWeb10k_Train))
throw new FileNotFoundException(string.Format(Helpers.DatasetNotFound, _mslrWeb10k_Train));

_modelPath_MSLR = Path.Combine(Directory.GetCurrentDirectory(), @"FastTreeRankingModel.zip");

string cmd = @"TrainTest test=" + _mslrWeb10k_Validate +
" eval=RankingEvaluator{t=10}" +
" data=" + _mslrWeb10k_Train +
" loader=TextLoader{col=Label:R4:0 col=GroupId:TX:1 col=Features:R4:2-138}" +
" xf=HashTransform{col=GroupId}" +
" xf=NAHandleTransform{col=Features}" +
" tr=LightGBMRanking{}";
" tr=FastTreeRanking{}" +
" out={" + _modelPath_MSLR + "}";

using (var environment = new ConsoleEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance))
{
Expand All @@ -95,7 +109,7 @@ public void TrainTest_Ranking_MSLRWeb10K_RawNumericFeatures_LightGBMRanking()
public void Test_Ranking_MSLRWeb10K_RawNumericFeatures_FastTreeRanking()
{
// This benchmark is profiling bulk scoring speed and not training speed.
string cmd = @"Test data=" + _mslrWeb10k_Test + " in="+ _modelPath_MSLR;
string cmd = @"Test data=" + _mslrWeb10k_Test + " in=" + _modelPath_MSLR;
using (var environment = new ConsoleEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance))
{
Maml.MainCore(environment, cmd, alwaysPrintStacktrace: false);
Expand Down
24 changes: 24 additions & 0 deletions test/Microsoft.ML.Benchmarks/PredictConfig.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using BenchmarkDotNet.Configs;
using BenchmarkDotNet.Diagnosers;
using BenchmarkDotNet.Jobs;

namespace Microsoft.ML.Benchmarks
{
internal class PredictConfig : ManualConfig
{
public PredictConfig()
{
Add(DefaultConfig.Instance
.With(Job.Default
.WithWarmupCount(1)
.WithMaxIterationCount(20)
.With(Program.CreateToolchain()))
.With(new ExtraMetricColumn())
.With(MemoryDiagnoser.Default));
}
}
}
16 changes: 2 additions & 14 deletions test/Microsoft.ML.Benchmarks/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using BenchmarkDotNet.Configs;
using BenchmarkDotNet.Diagnosers;
using BenchmarkDotNet.Jobs;
using BenchmarkDotNet.Running;
using BenchmarkDotNet.Toolchains;
using BenchmarkDotNet.Toolchains.CsProj;
Expand All @@ -25,21 +22,12 @@ class Program
static void Main(string[] args)
=> BenchmarkSwitcher
.FromAssembly(typeof(Program).Assembly)
.Run(args, CreateCustomConfig());

private static IConfig CreateCustomConfig()
=> DefaultConfig.Instance
.With(Job.Default
.WithWarmupCount(1) // for our time consuming benchmarks 1 warmup iteration is enough
.WithMaxIterationCount(20)
.With(CreateToolchain()))
.With(new ExtraMetricColumn())
.With(MemoryDiagnoser.Default);
.Run(args);

/// <summary>
/// we need our own toolchain because MSBuild by default does not copy recursive native dependencies to the output
/// </summary>
private static IToolchain CreateToolchain()
internal static IToolchain CreateToolchain()
{
var csProj = CsProjCoreToolchain.Current.Value;
var tfm = NetCoreAppSettings.Current.Value.TargetFrameworkMoniker;
Expand Down
88 changes: 48 additions & 40 deletions test/Microsoft.ML.Benchmarks/Text/MultiClassClassification.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,12 @@

namespace Microsoft.ML.Benchmarks
{
public class MultiClassClassification
[Config(typeof(TrainConfig))]
public class MultiClassClassificationTrain
{
private string _dataPath_Wiki;
private string _modelPath_Wiki;

[GlobalSetup(Targets = new string[] {
nameof(CV_Multiclass_WikiDetox_BigramsAndTrichar_OVAAveragedPerceptron),
nameof(CV_Multiclass_WikiDetox_BigramsAndTrichar_LightGBMMulticlass),
nameof(CV_Multiclass_WikiDetox_WordEmbeddings_OVAAveragedPerceptron),
nameof(CV_Multiclass_WikiDetox_WordEmbeddings_SDCAMC)})]
[GlobalSetup]
public void SetupTrainingSpeedTests()
{
_dataPath_Wiki = Path.GetFullPath(TestDatasets.WikiDetox.trainFilename);
Expand All @@ -29,26 +25,6 @@ public void SetupTrainingSpeedTests()
throw new FileNotFoundException(string.Format(Helpers.DatasetNotFound, _dataPath_Wiki));
}

[GlobalSetup(Target = nameof(Test_Multiclass_WikiDetox_BigramsAndTrichar_OVAAveragedPerceptron))]
public void SetupScoringSpeedTests()
{
SetupTrainingSpeedTests();
_modelPath_Wiki = Path.Combine(Directory.GetCurrentDirectory(), @"WikiModel.zip");

string cmd = @"CV k=5 data=" + _dataPath_Wiki +
" loader=TextLoader{quote=- sparse=- col=Label:R4:0 col=rev_id:TX:1 col=comment:TX:2 col=logged_in:BL:4 col=ns:TX:5 col=sample:TX:6 col=split:TX:7 col=year:R4:3 header=+} xf=Convert{col=logged_in type=R4}" +
" xf=CategoricalTransform{col=ns}" +
" xf=TextTransform{col=FeaturesText:comment wordExtractor=NGramExtractorTransform{ngram=2}}" +
" xf=Concat{col=Features:FeaturesText,logged_in,ns}" +
" tr=OVA{p=AveragedPerceptron{iter=10}}" +
" out={" + _modelPath_Wiki + "}";

using (var environment = new ConsoleEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance))
{
Maml.MainCore(environment, cmd, alwaysPrintStacktrace: false);
}
}

[Benchmark]
public void CV_Multiclass_WikiDetox_BigramsAndTrichar_OVAAveragedPerceptron()
{
Expand All @@ -74,26 +50,15 @@ public void CV_Multiclass_WikiDetox_BigramsAndTrichar_LightGBMMulticlass()
" xf=Convert{col=logged_in type=R4}" +
" xf=CategoricalTransform{col=ns}" +
" xf=TextTransform{col=FeaturesText:comment wordExtractor=NGramExtractorTransform{ngram=2}}" +
" xf=Concat{col=Features:FeaturesText,logged_in,ns} tr=LightGBMMulticlass{}";
" xf=Concat{col=Features:FeaturesText,logged_in,ns}" +
" tr=LightGBMMulticlass{iter=10}";

using (var environment = new ConsoleEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance))
{
Maml.MainCore(environment, cmd, alwaysPrintStacktrace: false);
}
}

[Benchmark]
public void Test_Multiclass_WikiDetox_BigramsAndTrichar_OVAAveragedPerceptron()
{
// This benchmark is profiling bulk scoring speed and not training speed.
string modelpath = Path.Combine(Directory.GetCurrentDirectory(), @"WikiModel.fold000.zip");
string cmd = @"Test data=" + _dataPath_Wiki + " in=" + modelpath;
using (var environment = new ConsoleEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance))
{
Maml.MainCore(environment, cmd, alwaysPrintStacktrace: false);
}
}

[Benchmark]
public void CV_Multiclass_WikiDetox_WordEmbeddings_OVAAveragedPerceptron()
{
Expand Down Expand Up @@ -130,4 +95,47 @@ public void CV_Multiclass_WikiDetox_WordEmbeddings_SDCAMC()
}
}
}

[Config(typeof(PredictConfig))]
public class MultiClassClassificationTest
{
private string _dataPath_Wiki;
private string _modelPath_Wiki;

[GlobalSetup]
public void SetupScoringSpeedTests()
{
_dataPath_Wiki = Path.GetFullPath(TestDatasets.WikiDetox.trainFilename);

if (!File.Exists(_dataPath_Wiki))
throw new FileNotFoundException(string.Format(Helpers.DatasetNotFound, _dataPath_Wiki));

_modelPath_Wiki = Path.Combine(Directory.GetCurrentDirectory(), @"WikiModel.zip");

string cmd = @"CV k=5 data=" + _dataPath_Wiki +
" loader=TextLoader{quote=- sparse=- col=Label:R4:0 col=rev_id:TX:1 col=comment:TX:2 col=logged_in:BL:4 col=ns:TX:5 col=sample:TX:6 col=split:TX:7 col=year:R4:3 header=+} xf=Convert{col=logged_in type=R4}" +
" xf=CategoricalTransform{col=ns}" +
" xf=TextTransform{col=FeaturesText:comment wordExtractor=NGramExtractorTransform{ngram=2}}" +
" xf=Concat{col=Features:FeaturesText,logged_in,ns}" +
" tr=OVA{p=AveragedPerceptron{iter=10}}" +
" out={" + _modelPath_Wiki + "}";

using (var environment = new ConsoleEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance))
{
Maml.MainCore(environment, cmd, alwaysPrintStacktrace: false);
}
}

[Benchmark]
public void Test_Multiclass_WikiDetox_BigramsAndTrichar_OVAAveragedPerceptron()
{
// This benchmark is profiling bulk scoring speed and not training speed.
string modelpath = Path.Combine(Directory.GetCurrentDirectory(), @"WikiModel.fold000.zip");
string cmd = @"Test data=" + _dataPath_Wiki + " in=" + modelpath;
using (var environment = new ConsoleEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance))
{
Maml.MainCore(environment, cmd, alwaysPrintStacktrace: false);
}
}
}
}
25 changes: 25 additions & 0 deletions test/Microsoft.ML.Benchmarks/TrainConfig.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using BenchmarkDotNet.Configs;
using BenchmarkDotNet.Diagnosers;
using BenchmarkDotNet.Jobs;

namespace Microsoft.ML.Benchmarks
{
public class TrainConfig : ManualConfig
{
public TrainConfig()
{
Add(DefaultConfig.Instance
.With(Job.Default
.WithWarmupCount(0)
.WithIterationCount(1)
.WithLaunchCount(3) // BDN will start 3 dedicated processes, each of them will just run given benchmark once, without any warm up to mimic the real world.
.With(Program.CreateToolchain()))
.With(new ExtraMetricColumn())
.With(MemoryDiagnoser.Default));
}
}
}