From f18f9273820d43925f04d0643fa978457656297d Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Sun, 26 Aug 2018 16:45:06 +0200 Subject: [PATCH 01/12] simplify and cleanup the code, remove dead code --- .../KMeansAndLogisticRegressionBench.cs | 18 +-- ...sticDualCoordinateAscentClassifierBench.cs | 153 +++++++++--------- 2 files changed, 77 insertions(+), 94 deletions(-) diff --git a/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs b/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs index f13bb7260e..f567c11cac 100644 --- a/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs +++ b/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs @@ -3,9 +3,7 @@ // See the LICENSE file in the project root for more information. using BenchmarkDotNet.Attributes; -using BenchmarkDotNet.Running; using Microsoft.ML.Runtime; -using Microsoft.ML.Runtime.Api; using Microsoft.ML.Runtime.CommandLine; using Microsoft.ML.Runtime.Data; using Microsoft.ML.Runtime.Learners; @@ -14,21 +12,11 @@ namespace Microsoft.ML.Benchmarks { public class KMeansAndLogisticRegressionBench { - private static string s_dataPath; + private readonly string _dataPath = Program.GetDataPath("adult.train"); [Benchmark] - public IPredictor TrainKMeansAndLR() => TrainKMeansAndLRCore(); - - [GlobalSetup] - public void Setup() + public IPredictor TrainKMeansAndLR() { - s_dataPath = Program.GetDataPath("adult.train"); - } - - private static IPredictor TrainKMeansAndLRCore() - { - string dataPath = s_dataPath; - using (var env = new TlcEnvironment(seed: 1)) { // Pipeline @@ -67,7 +55,7 @@ private static IPredictor TrainKMeansAndLRCore() Type = DataKind.R4 } } - }, new MultiFileSource(dataPath)); + }, new MultiFileSource(_dataPath)); IDataTransform trans = CategoricalTransform.Create(env, new CategoricalTransform.Arguments { diff --git a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs index ce8cb1aad0..637346a374 100644 --- a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs +++ b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs @@ -11,22 +11,18 @@ using Microsoft.ML.Runtime.Learners; using Microsoft.ML.Trainers; using Microsoft.ML.Transforms; -using System; using System.Collections.Generic; namespace Microsoft.ML.Benchmarks { public class StochasticDualCoordinateAscentClassifierBench { - internal static ClassificationMetrics s_metrics; - private static PredictionModel s_trainedModel; - private static string s_dataPath; - private static string s_sentimentDataPath; - private static IrisData[][] s_batches; - private static readonly int[] s_batchSizes = new int[] { 1, 2, 5 }; - private readonly Random r = new Random(0); - private readonly Consumer _consumer = new Consumer(); - private static readonly IrisData s_example = new IrisData() + private readonly string _dataPath = Program.GetDataPath("iris.txt"); + private readonly string _sentimentDataPath = Program.GetDataPath("wikipedia-detox-250-line-data.tsv"); + private readonly Consumer _consumer = new Consumer(); // BenchmarkDotNet utility type used to prevent dead code elimination + + private readonly int[] _batchSizes = new int[] { 1, 2, 5 }; + private readonly IrisData _example = new IrisData() { SepalLength = 3.3f, SepalWidth = 1.6f, @@ -34,71 +30,29 @@ public class StochasticDualCoordinateAscentClassifierBench PetalWidth = 5.1f, }; - [GlobalSetup] - public void Setup() - { - s_dataPath = Program.GetDataPath("iris.txt"); - s_sentimentDataPath = Program.GetDataPath("wikipedia-detox-250-line-data.tsv"); - s_trainedModel = TrainCore(); - IrisPrediction prediction = s_trainedModel.Predict(s_example); - - var testData = new Data.TextLoader(s_dataPath).CreateFrom(useHeader: true); - var evaluator = new ClassificationEvaluator(); - s_metrics = evaluator.Evaluate(s_trainedModel, testData); - - s_batches = new IrisData[s_batchSizes.Length][]; - for (int i = 0; i < s_batches.Length; i++) - { - var batch = new IrisData[s_batchSizes[i]]; - s_batches[i] = batch; - for (int bi = 0; bi < batch.Length; bi++) - { - batch[bi] = s_example; - } - } - } - - [Benchmark] - public PredictionModel TrainIris() => TrainCore(); + private PredictionModel _trainedModel; + private IrisData[][] _batches; - [Benchmark] - public float[] PredictIris() => s_trainedModel.Predict(s_example).PredictedLabels; - - [Benchmark] - public void PredictIrisBatchOf1() => Consume(s_trainedModel.Predict(s_batches[0])); - - [Benchmark] - public void PredictIrisBatchOf2() => Consume(s_trainedModel.Predict(s_batches[1])); - - [Benchmark] - public void PredictIrisBatchOf5() => Consume(s_trainedModel.Predict(s_batches[2])); + internal static ClassificationMetrics s_metrics; [Benchmark] - public IPredictor TrainSentiment() => TrainSentimentCore(); + public PredictionModel TrainIris() => Train(_dataPath); - private void Consume(IEnumerable predictions) - { - foreach (var prediction in predictions) - _consumer.Consume(prediction); - } - - private static PredictionModel TrainCore() + private PredictionModel Train(string dataPath) { var pipeline = new LearningPipeline(); - pipeline.Add(new Data.TextLoader(s_dataPath).CreateFrom(useHeader: true)); - pipeline.Add(new ColumnConcatenator(outputColumn: "Features", - "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")); + pipeline.Add(new Data.TextLoader(dataPath).CreateFrom(useHeader: true)); + pipeline.Add(new ColumnConcatenator(outputColumn: "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")); pipeline.Add(new StochasticDualCoordinateAscentClassifier()); - PredictionModel model = pipeline.Train(); - return model; + return pipeline.Train(); } - private static IPredictor TrainSentimentCore() + [Benchmark] + public IPredictor TrainSentiment() { - var dataPath = s_sentimentDataPath; using (var env = new TlcEnvironment(seed: 1)) { // Pipeline @@ -125,7 +79,7 @@ private static IPredictor TrainSentimentCore() Type = DataKind.Text } } - }, new MultiFileSource(dataPath)); + }, new MultiFileSource(_sentimentDataPath)); var text = TextTransform.Create(env, new TextTransform.Arguments() @@ -145,7 +99,7 @@ private static IPredictor TrainSentimentCore() WordFeatureExtractor = null, }, loader); - var trans = new WordEmbeddingsTransform(env, + var trans = new WordEmbeddingsTransform(env, new WordEmbeddingsTransform.Arguments() { Column = new WordEmbeddingsTransform.Column[1] @@ -162,32 +116,73 @@ private static IPredictor TrainSentimentCore() // Train var trainer = new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments() { MaxIterations = 20 }); var trainRoles = new RoleMappedData(trans, label: "Label", feature: "Features"); + return trainer.Train(trainRoles); } } - public class IrisData + [GlobalSetup] + public void Setup() { - [Column("0")] - public float Label; + _trainedModel = Train(_dataPath); + IrisPrediction prediction = _trainedModel.Predict(_example); - [Column("1")] - public float SepalLength; + var testData = new Data.TextLoader(_dataPath).CreateFrom(useHeader: true); + var evaluator = new ClassificationEvaluator(); + s_metrics = evaluator.Evaluate(_trainedModel, testData); - [Column("2")] - public float SepalWidth; + _batches = new IrisData[_batchSizes.Length][]; + for (int i = 0; i < _batches.Length; i++) + { + var batch = new IrisData[_batchSizes[i]]; + _batches[i] = batch; + for (int bi = 0; bi < batch.Length; bi++) + { + batch[bi] = _example; + } + } + } - [Column("3")] - public float PetalLength; + [Benchmark] + public float[] PredictIris() => _trainedModel.Predict(_example).PredictedLabels; - [Column("4")] - public float PetalWidth; - } + [Benchmark] + public void PredictIrisBatchOf1() => Consume(_trainedModel.Predict(_batches[0])); + + [Benchmark] + public void PredictIrisBatchOf2() => Consume(_trainedModel.Predict(_batches[1])); - public class IrisPrediction + [Benchmark] + public void PredictIrisBatchOf5() => Consume(_trainedModel.Predict(_batches[2])); + + private void Consume(IEnumerable predictions) { - [ColumnName("Score")] - public float[] PredictedLabels; + foreach (var prediction in predictions) + _consumer.Consume(prediction); } } + + public class IrisData + { + [Column("0")] + public float Label; + + [Column("1")] + public float SepalLength; + + [Column("2")] + public float SepalWidth; + + [Column("3")] + public float PetalLength; + + [Column("4")] + public float PetalWidth; + } + + public class IrisPrediction + { + [ColumnName("Score")] + public float[] PredictedLabels; + } } From 058ab8934a1139efb8886114689270fa1dcad681 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Sun, 26 Aug 2018 16:45:48 +0200 Subject: [PATCH 02/12] use `Target` to specify that given setup method should be executed for selected benchmarks, not all --- .../StochasticDualCoordinateAscentClassifierBench.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs index 637346a374..a5251f8311 100644 --- a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs +++ b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs @@ -121,8 +121,8 @@ public IPredictor TrainSentiment() } } - [GlobalSetup] - public void Setup() + [GlobalSetup(Targets = new string[] { nameof(PredictIris), nameof(PredictIrisBatchOf1), nameof(PredictIrisBatchOf2), nameof(PredictIrisBatchOf5) })] + public void SetupPredictBenchmarks() { _trainedModel = Train(_dataPath); IrisPrediction prediction = _trainedModel.Predict(_example); From c09024fc00c10e66409bac2ff6c6ebc679bdf6e4 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Sun, 26 Aug 2018 17:18:32 +0200 Subject: [PATCH 03/12] consume the result of Predict to make sure it does not get dead-code eliminated --- .../StochasticDualCoordinateAscentClassifierBench.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs index a5251f8311..ec7d960740 100644 --- a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs +++ b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs @@ -125,7 +125,7 @@ public IPredictor TrainSentiment() public void SetupPredictBenchmarks() { _trainedModel = Train(_dataPath); - IrisPrediction prediction = _trainedModel.Predict(_example); + _consumer.Consume(_trainedModel.Predict(_example)); var testData = new Data.TextLoader(_dataPath).CreateFrom(useHeader: true); var evaluator = new ClassificationEvaluator(); From 1ea36e22a2dd754cd96b6be988200c8498753127 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Sun, 26 Aug 2018 21:30:52 +0200 Subject: [PATCH 04/12] reference input files from .csproj and copy them to output dir, don't rely on hardcoded folder hierarchy --- .../Microsoft.ML.Benchmarks.csproj | 12 ++++++++++++ test/Microsoft.ML.Benchmarks/Program.cs | 10 +--------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj b/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj index 5a9f3e7467..dfa673ea82 100644 --- a/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj +++ b/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj @@ -20,4 +20,16 @@ + + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + \ No newline at end of file diff --git a/test/Microsoft.ML.Benchmarks/Program.cs b/test/Microsoft.ML.Benchmarks/Program.cs index 0b4e9edc52..2a470f6128 100644 --- a/test/Microsoft.ML.Benchmarks/Program.cs +++ b/test/Microsoft.ML.Benchmarks/Program.cs @@ -34,15 +34,7 @@ private static IConfig CreateCustomConfig() .With(MemoryDiagnoser.Default); internal static string GetDataPath(string name) - => Path.GetFullPath(Path.Combine(_dataRoot, name)); - - static readonly string _dataRoot; - static Program() - { - var currentAssemblyLocation = new FileInfo(typeof(Program).Assembly.Location); - var rootDir = currentAssemblyLocation.Directory.Parent.Parent.Parent.Parent.FullName; - _dataRoot = Path.Combine(rootDir, "test", "data"); - } + => Path.Combine(Path.GetDirectoryName(typeof(Program).Assembly.Location), "Input", name); } public class ClassificationMetricsColumn : IColumn From a6ff27ac7ebf73b43c203ef256e3c88e1643a4fc Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Sun, 26 Aug 2018 21:55:10 +0200 Subject: [PATCH 05/12] every ML.NET benchmark allocates a lot of memory and should be executed in a dedicated process --- .../Harness/ProjectGenerator.cs | 54 +++++++++++++++++++ test/Microsoft.ML.Benchmarks/Program.cs | 18 ++++++- 2 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 test/Microsoft.ML.Benchmarks/Harness/ProjectGenerator.cs diff --git a/test/Microsoft.ML.Benchmarks/Harness/ProjectGenerator.cs b/test/Microsoft.ML.Benchmarks/Harness/ProjectGenerator.cs new file mode 100644 index 0000000000..ca4f65d323 --- /dev/null +++ b/test/Microsoft.ML.Benchmarks/Harness/ProjectGenerator.cs @@ -0,0 +1,54 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using BenchmarkDotNet.Extensions; +using BenchmarkDotNet.Toolchains; +using BenchmarkDotNet.Toolchains.CsProj; +using System; +using System.IO; +using System.Linq; + +namespace Microsoft.ML.Benchmarks.Harness +{ + /// + /// to avoid side effects of benchmarks affect each other BenchmarkDotNet runs every benchmark in a standalone, dedicated process + /// however to do that it needs to be able to create, build and run new executable + /// + /// the problem with ML.NET is that it has native dependencies, which are NOT copied by MSBuild to the output folder + /// in case where A has native dependency and B references A + /// + /// this is why this class exists: to copy the native dependencies to folder with .exe + /// + public class ProjectGenerator : CsProjGenerator + { + public ProjectGenerator(string targetFrameworkMoniker) : base(targetFrameworkMoniker, platform => platform.ToConfig(), null) + { + } + + protected override void CopyAllRequiredFiles(ArtifactsPaths artifactsPaths) + { + base.CopyAllRequiredFiles(artifactsPaths); + + CopyMissingNativeDependencies(artifactsPaths); + } + + private void CopyMissingNativeDependencies(ArtifactsPaths artifactsPaths) + { + var foldeWithAutogeneratedExe = Path.GetDirectoryName(artifactsPaths.ExecutablePath); + var folderWithNativeDependencies = Path.GetDirectoryName(typeof(ProjectGenerator).Assembly.Location); + + foreach(var nativeDependency in Directory + .EnumerateFiles(folderWithNativeDependencies) + .Where(fileName => ContainsWithIgnoreCase(fileName, "native"))) + { + File.Copy( + sourceFileName: nativeDependency, + destFileName: Path.Combine(foldeWithAutogeneratedExe, Path.GetFileName(nativeDependency)), + overwrite: true); + } + } + + bool ContainsWithIgnoreCase(string text, string word) => text != null && text.IndexOf(word, StringComparison.InvariantCultureIgnoreCase) >= 0; + } +} diff --git a/test/Microsoft.ML.Benchmarks/Program.cs b/test/Microsoft.ML.Benchmarks/Program.cs index 2a470f6128..857b17cc28 100644 --- a/test/Microsoft.ML.Benchmarks/Program.cs +++ b/test/Microsoft.ML.Benchmarks/Program.cs @@ -8,7 +8,12 @@ using BenchmarkDotNet.Running; using BenchmarkDotNet.Columns; using BenchmarkDotNet.Reports; +using BenchmarkDotNet.Toolchains; +using BenchmarkDotNet.Toolchains.CsProj; +using BenchmarkDotNet.Toolchains.DotNetCli; using BenchmarkDotNet.Toolchains.InProcess; +using BenchmarkDotNet.Validators; +using Microsoft.ML.Benchmarks.Harness; using System.IO; using Microsoft.ML.Models; @@ -29,10 +34,21 @@ private static IConfig CreateCustomConfig() => DefaultConfig.Instance .With(Job.Default .WithMaxIterationCount(20) - .With(InProcessToolchain.Instance)) .With(new ClassificationMetricsColumn("AccuracyMacro", "Macro-average accuracy of the model")) + .With(CreateToolchain())) .With(MemoryDiagnoser.Default); + private static IToolchain CreateToolchain() + { + var csProj = CsProjCoreToolchain.Current.Value; + var tfm = NetCoreAppSettings.Current.Value.TargetFrameworkMoniker; + + return new Toolchain( + tfm, + new ProjectGenerator(tfm), + csProj.Builder, + csProj.Executor); + } internal static string GetDataPath(string name) => Path.Combine(Path.GetDirectoryName(typeof(Program).Assembly.Location), "Input", name); } From da080f15b4f4f1c03736654d629bb11332b9ac2f Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Sun, 26 Aug 2018 21:59:35 +0200 Subject: [PATCH 06/12] make it possible for every type to report different metrics --- .../Harness/Metrics.cs | 110 ++++++++++++++++++ test/Microsoft.ML.Benchmarks/Program.cs | 40 +------ ...sticDualCoordinateAscentClassifierBench.cs | 14 ++- 3 files changed, 123 insertions(+), 41 deletions(-) create mode 100644 test/Microsoft.ML.Benchmarks/Harness/Metrics.cs diff --git a/test/Microsoft.ML.Benchmarks/Harness/Metrics.cs b/test/Microsoft.ML.Benchmarks/Harness/Metrics.cs new file mode 100644 index 0000000000..11b670cdcd --- /dev/null +++ b/test/Microsoft.ML.Benchmarks/Harness/Metrics.cs @@ -0,0 +1,110 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using BenchmarkDotNet.Attributes; +using BenchmarkDotNet.Columns; +using BenchmarkDotNet.Reports; +using BenchmarkDotNet.Running; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace Microsoft.ML.Benchmarks +{ + public abstract class WithExtraMetrics + { + protected abstract IEnumerable GetMetrics(); + + /// + /// this method is executed after running the benchmrks + /// we use it as hack to simply print to console so ExtraMetricColumn can parse the output + /// + [GlobalCleanup] + public void ReportMetrics() + { + foreach (var metric in GetMetrics()) + { + Console.WriteLine(metric.ToParsableString()); + } + } + } + + public class ExtraMetricColumn : IColumn + { + public string ColumnName => "Extra Metric"; + public string Id => nameof(ExtraMetricColumn); + public string Legend => "Value of the provided extra metric"; + public bool IsNumeric => true; + public bool IsDefault(Summary summary, BenchmarkCase benchmark) => true; + public bool IsAvailable(Summary summary) => true; + public bool AlwaysShow => true; + public ColumnCategory Category => ColumnCategory.Custom; + public int PriorityInCategory => 1; + public UnitType UnitType => UnitType.Dimensionless; + public string GetValue(Summary summary, BenchmarkCase benchmark) => GetValue(summary, benchmark, null); + public override string ToString() => ColumnName; + + public string GetValue(Summary summary, BenchmarkCase benchmark, ISummaryStyle style) + { + if (!summary.HasReport(benchmark)) + return "-"; + + var results = summary[benchmark].ExecuteResults; + if (results.Count != 1) + return "-"; + + var result = results.Single(); + var buffer = new StringBuilder(); + + foreach (var line in result.ExtraOutput) + { + if (Metric.TryParse(line, out Metric metric)) + { + if (buffer.Length > 0) + buffer.Append(", "); + + buffer.Append(metric.ToColumnValue()); + } + } + + return buffer.Length > 0 ? buffer.ToString() : "-"; + } + } + + public struct Metric + { + private const string Prefix = "// Metric"; + private const char Separator = '#'; + + public string Name { get; } + public string Value { get; } + + public Metric(string name, string value) : this() + { + Name = name; + Value = value; + } + + public string ToColumnValue() + => $"{Name}: {Value}"; + + public string ToParsableString() + => $"{Prefix} {Separator} {Name} {Separator} {Value}"; + + public static bool TryParse(string line, out Metric metric) + { + metric = default; + + if (!line.StartsWith(Prefix)) + return false; + + var splitted = line.Split(Separator); + + metric = new Metric(splitted[1].Trim(), splitted[2].Trim()); + + return true; + } + } +} diff --git a/test/Microsoft.ML.Benchmarks/Program.cs b/test/Microsoft.ML.Benchmarks/Program.cs index 857b17cc28..81f68aefe7 100644 --- a/test/Microsoft.ML.Benchmarks/Program.cs +++ b/test/Microsoft.ML.Benchmarks/Program.cs @@ -6,16 +6,11 @@ using BenchmarkDotNet.Diagnosers; using BenchmarkDotNet.Jobs; using BenchmarkDotNet.Running; -using BenchmarkDotNet.Columns; -using BenchmarkDotNet.Reports; using BenchmarkDotNet.Toolchains; using BenchmarkDotNet.Toolchains.CsProj; using BenchmarkDotNet.Toolchains.DotNetCli; -using BenchmarkDotNet.Toolchains.InProcess; -using BenchmarkDotNet.Validators; using Microsoft.ML.Benchmarks.Harness; using System.IO; -using Microsoft.ML.Models; namespace Microsoft.ML.Benchmarks { @@ -34,8 +29,8 @@ private static IConfig CreateCustomConfig() => DefaultConfig.Instance .With(Job.Default .WithMaxIterationCount(20) - .With(new ClassificationMetricsColumn("AccuracyMacro", "Macro-average accuracy of the model")) .With(CreateToolchain())) + .With(new ExtraMetricColumn()) .With(MemoryDiagnoser.Default); private static IToolchain CreateToolchain() @@ -49,39 +44,8 @@ private static IToolchain CreateToolchain() csProj.Builder, csProj.Executor); } + internal static string GetDataPath(string name) => Path.Combine(Path.GetDirectoryName(typeof(Program).Assembly.Location), "Input", name); } - - public class ClassificationMetricsColumn : IColumn - { - private readonly string _metricName; - private readonly string _legend; - - public ClassificationMetricsColumn(string metricName, string legend) - { - _metricName = metricName; - _legend = legend; - } - - public string ColumnName => _metricName; - public string Id => _metricName; - public string Legend => _legend; - public bool IsNumeric => true; - public bool IsDefault(Summary summary, BenchmarkCase benchmark) => true; - public bool IsAvailable(Summary summary) => true; - public bool AlwaysShow => true; - public ColumnCategory Category => ColumnCategory.Custom; - public int PriorityInCategory => 1; - public UnitType UnitType => UnitType.Dimensionless; - - public string GetValue(Summary summary, BenchmarkCase benchmark, ISummaryStyle style) - { - var property = typeof(ClassificationMetrics).GetProperty(_metricName); - return property.GetValue(StochasticDualCoordinateAscentClassifierBench.s_metrics).ToString(); - } - public string GetValue(Summary summary, BenchmarkCase benchmark) => GetValue(summary, benchmark, null); - - public override string ToString() => ColumnName; - } } diff --git a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs index ec7d960740..89c18d8ef6 100644 --- a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs +++ b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs @@ -12,10 +12,11 @@ using Microsoft.ML.Trainers; using Microsoft.ML.Transforms; using System.Collections.Generic; +using System.Globalization; namespace Microsoft.ML.Benchmarks { - public class StochasticDualCoordinateAscentClassifierBench + public class StochasticDualCoordinateAscentClassifierBench : WithExtraMetrics { private readonly string _dataPath = Program.GetDataPath("iris.txt"); private readonly string _sentimentDataPath = Program.GetDataPath("wikipedia-detox-250-line-data.tsv"); @@ -32,8 +33,15 @@ public class StochasticDualCoordinateAscentClassifierBench private PredictionModel _trainedModel; private IrisData[][] _batches; + private ClassificationMetrics _metrics; - internal static ClassificationMetrics s_metrics; + protected override IEnumerable GetMetrics() + { + if (_metrics != null) + yield return new Metric( + nameof(ClassificationMetrics.AccuracyMacro), + _metrics.AccuracyMacro.ToString("0.##", CultureInfo.InvariantCulture)); + } [Benchmark] public PredictionModel TrainIris() => Train(_dataPath); @@ -129,7 +137,7 @@ public void SetupPredictBenchmarks() var testData = new Data.TextLoader(_dataPath).CreateFrom(useHeader: true); var evaluator = new ClassificationEvaluator(); - s_metrics = evaluator.Evaluate(_trainedModel, testData); + _metrics = evaluator.Evaluate(_trainedModel, testData); _batches = new IrisData[_batchSizes.Length][]; for (int i = 0; i < _batches.Length; i++) From 4e90e06fc2f864c2f187e96b8c49740b75305125 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Sun, 26 Aug 2018 22:54:17 +0200 Subject: [PATCH 07/12] enforce current culture as "en-us" because the input data files use dot as decimal separator (and it fails for cultures with ",") --- .../KMeansAndLogisticRegressionBench.cs | 2 +- test/Microsoft.ML.Benchmarks/Program.cs | 11 +++++++++-- .../StochasticDualCoordinateAscentClassifierBench.cs | 4 ++-- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs b/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs index f567c11cac..c07ce3c3a1 100644 --- a/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs +++ b/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs @@ -12,7 +12,7 @@ namespace Microsoft.ML.Benchmarks { public class KMeansAndLogisticRegressionBench { - private readonly string _dataPath = Program.GetDataPath("adult.train"); + private readonly string _dataPath = Program.GetInvariantCultureDataPath("adult.train"); [Benchmark] public IPredictor TrainKMeansAndLR() diff --git a/test/Microsoft.ML.Benchmarks/Program.cs b/test/Microsoft.ML.Benchmarks/Program.cs index 81f68aefe7..6c5a31b267 100644 --- a/test/Microsoft.ML.Benchmarks/Program.cs +++ b/test/Microsoft.ML.Benchmarks/Program.cs @@ -10,7 +10,9 @@ using BenchmarkDotNet.Toolchains.CsProj; using BenchmarkDotNet.Toolchains.DotNetCli; using Microsoft.ML.Benchmarks.Harness; +using System.Globalization; using System.IO; +using System.Threading; namespace Microsoft.ML.Benchmarks { @@ -45,7 +47,12 @@ private static IToolchain CreateToolchain() csProj.Executor); } - internal static string GetDataPath(string name) - => Path.Combine(Path.GetDirectoryName(typeof(Program).Assembly.Location), "Input", name); + internal static string GetInvariantCultureDataPath(string name) + { + // enforce Neutral Language as "en-us" because the input data files use dot as decimal separator (and it fails for cultures with ",") + Thread.CurrentThread.CurrentCulture = CultureInfo.InvariantCulture; + + return Path.Combine(Path.GetDirectoryName(typeof(Program).Assembly.Location), "Input", name); + } } } diff --git a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs index 89c18d8ef6..9b143f4691 100644 --- a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs +++ b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs @@ -18,8 +18,8 @@ namespace Microsoft.ML.Benchmarks { public class StochasticDualCoordinateAscentClassifierBench : WithExtraMetrics { - private readonly string _dataPath = Program.GetDataPath("iris.txt"); - private readonly string _sentimentDataPath = Program.GetDataPath("wikipedia-detox-250-line-data.tsv"); + private readonly string _dataPath = Program.GetInvariantCultureDataPath("iris.txt"); + private readonly string _sentimentDataPath = Program.GetInvariantCultureDataPath("wikipedia-detox-250-line-data.tsv"); private readonly Consumer _consumer = new Consumer(); // BenchmarkDotNet utility type used to prevent dead code elimination private readonly int[] _batchSizes = new int[] { 1, 2, 5 }; From fe38aeb6644fc48016c59e7726415c7e5dbe98bd Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Sun, 26 Aug 2018 23:18:10 +0200 Subject: [PATCH 08/12] for our time consuming benchmarks 1 warmup iteration is enough --- test/Microsoft.ML.Benchmarks/Program.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/test/Microsoft.ML.Benchmarks/Program.cs b/test/Microsoft.ML.Benchmarks/Program.cs index 6c5a31b267..b14520f6cf 100644 --- a/test/Microsoft.ML.Benchmarks/Program.cs +++ b/test/Microsoft.ML.Benchmarks/Program.cs @@ -30,6 +30,7 @@ static void Main(string[] args) private static IConfig CreateCustomConfig() => DefaultConfig.Instance .With(Job.Default + .WithWarmupCount(1) // for our time consuming benchmarks 1 warmup iteration is enough .WithMaxIterationCount(20) .With(CreateToolchain())) .With(new ExtraMetricColumn()) From 282b088cda2693bb077c5cccd356124b480e7184 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Sun, 26 Aug 2018 23:33:37 +0200 Subject: [PATCH 09/12] workaround for the auto-generated code to avoid name coflict for Microsoft.ML.Runtime.IHost and BenchmarkDotNet.Engines.IHost.. --- .../KMeansAndLogisticRegressionBench.cs | 4 +++- .../StochasticDualCoordinateAscentClassifierBench.cs | 5 +++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs b/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs index c07ce3c3a1..0109ef5910 100644 --- a/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs +++ b/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs @@ -4,6 +4,7 @@ using BenchmarkDotNet.Attributes; using Microsoft.ML.Runtime; +using Microsoft.ML.Runtime.Internal.Calibration; using Microsoft.ML.Runtime.CommandLine; using Microsoft.ML.Runtime.Data; using Microsoft.ML.Runtime.Learners; @@ -15,7 +16,7 @@ public class KMeansAndLogisticRegressionBench private readonly string _dataPath = Program.GetInvariantCultureDataPath("adult.train"); [Benchmark] - public IPredictor TrainKMeansAndLR() + public ParameterMixingCalibratedPredictor TrainKMeansAndLR() { using (var env = new TlcEnvironment(seed: 1)) { @@ -77,6 +78,7 @@ public IPredictor TrainKMeansAndLR() // Train var trainer = new LogisticRegression(env, new LogisticRegression.Arguments() { EnforceNonNegativity = true, OptTol = 1e-3f }); var trainRoles = new RoleMappedData(trans, label: "Label", feature: "Features"); + return trainer.Train(trainRoles); } } diff --git a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs index 9b143f4691..5262306d70 100644 --- a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs +++ b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs @@ -59,7 +59,7 @@ private PredictionModel Train(string dataPath) } [Benchmark] - public IPredictor TrainSentiment() + public void TrainSentiment() { using (var env = new TlcEnvironment(seed: 1)) { @@ -125,7 +125,8 @@ public IPredictor TrainSentiment() var trainer = new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments() { MaxIterations = 20 }); var trainRoles = new RoleMappedData(trans, label: "Label", feature: "Features"); - return trainer.Train(trainRoles); + var predicted = trainer.Train(trainRoles); + _consumer.Consume(predicted); } } From 8c4e9b980f3c7e1b117a8fc8205aacdb40cc5c36 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Sun, 26 Aug 2018 23:34:02 +0200 Subject: [PATCH 10/12] add comment about why we need a custom toolchain --- test/Microsoft.ML.Benchmarks/Program.cs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/Microsoft.ML.Benchmarks/Program.cs b/test/Microsoft.ML.Benchmarks/Program.cs index b14520f6cf..5396e17b7b 100644 --- a/test/Microsoft.ML.Benchmarks/Program.cs +++ b/test/Microsoft.ML.Benchmarks/Program.cs @@ -36,6 +36,9 @@ private static IConfig CreateCustomConfig() .With(new ExtraMetricColumn()) .With(MemoryDiagnoser.Default); + /// + /// we need our own toolchain because MSBuild by default does not copy recursive native dependencies to the output + /// private static IToolchain CreateToolchain() { var csProj = CsProjCoreToolchain.Current.Value; From 5bfa49253f6df62161ccf59102aaba559b7531cf Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Sun, 26 Aug 2018 23:34:28 +0200 Subject: [PATCH 11/12] update BDN version to allow benchmarking with CoreRun --- build/Dependencies.props | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build/Dependencies.props b/build/Dependencies.props index 3a46917114..e59a795ee2 100644 --- a/build/Dependencies.props +++ b/build/Dependencies.props @@ -10,6 +10,6 @@ 2.1.2.2 0.0.0.5 4.5.0 - 0.11.0 + 0.11.1 From ec3df9f32b5b8a020c3705399e924b5626da19ac Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Wed, 29 Aug 2018 12:38:01 +0200 Subject: [PATCH 12/12] code review fix: spacing --- test/Microsoft.ML.Benchmarks/Harness/ProjectGenerator.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Microsoft.ML.Benchmarks/Harness/ProjectGenerator.cs b/test/Microsoft.ML.Benchmarks/Harness/ProjectGenerator.cs index ca4f65d323..7560efe562 100644 --- a/test/Microsoft.ML.Benchmarks/Harness/ProjectGenerator.cs +++ b/test/Microsoft.ML.Benchmarks/Harness/ProjectGenerator.cs @@ -38,7 +38,7 @@ private void CopyMissingNativeDependencies(ArtifactsPaths artifactsPaths) var foldeWithAutogeneratedExe = Path.GetDirectoryName(artifactsPaths.ExecutablePath); var folderWithNativeDependencies = Path.GetDirectoryName(typeof(ProjectGenerator).Assembly.Location); - foreach(var nativeDependency in Directory + foreach (var nativeDependency in Directory .EnumerateFiles(folderWithNativeDependencies) .Where(fileName => ContainsWithIgnoreCase(fileName, "native"))) {