diff --git a/build/Dependencies.props b/build/Dependencies.props
index 0b6af3cdc9..e880e8c66b 100644
--- a/build/Dependencies.props
+++ b/build/Dependencies.props
@@ -10,7 +10,7 @@
2.1.2.2
0.0.0.5
4.5.0
- 0.11.0
+ 0.11.1
1.10.0
diff --git a/test/Microsoft.ML.Benchmarks/Harness/Metrics.cs b/test/Microsoft.ML.Benchmarks/Harness/Metrics.cs
new file mode 100644
index 0000000000..11b670cdcd
--- /dev/null
+++ b/test/Microsoft.ML.Benchmarks/Harness/Metrics.cs
@@ -0,0 +1,110 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using BenchmarkDotNet.Attributes;
+using BenchmarkDotNet.Columns;
+using BenchmarkDotNet.Reports;
+using BenchmarkDotNet.Running;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Microsoft.ML.Benchmarks
+{
+ public abstract class WithExtraMetrics
+ {
+ protected abstract IEnumerable GetMetrics();
+
+ ///
+ /// this method is executed after running the benchmrks
+ /// we use it as hack to simply print to console so ExtraMetricColumn can parse the output
+ ///
+ [GlobalCleanup]
+ public void ReportMetrics()
+ {
+ foreach (var metric in GetMetrics())
+ {
+ Console.WriteLine(metric.ToParsableString());
+ }
+ }
+ }
+
+ public class ExtraMetricColumn : IColumn
+ {
+ public string ColumnName => "Extra Metric";
+ public string Id => nameof(ExtraMetricColumn);
+ public string Legend => "Value of the provided extra metric";
+ public bool IsNumeric => true;
+ public bool IsDefault(Summary summary, BenchmarkCase benchmark) => true;
+ public bool IsAvailable(Summary summary) => true;
+ public bool AlwaysShow => true;
+ public ColumnCategory Category => ColumnCategory.Custom;
+ public int PriorityInCategory => 1;
+ public UnitType UnitType => UnitType.Dimensionless;
+ public string GetValue(Summary summary, BenchmarkCase benchmark) => GetValue(summary, benchmark, null);
+ public override string ToString() => ColumnName;
+
+ public string GetValue(Summary summary, BenchmarkCase benchmark, ISummaryStyle style)
+ {
+ if (!summary.HasReport(benchmark))
+ return "-";
+
+ var results = summary[benchmark].ExecuteResults;
+ if (results.Count != 1)
+ return "-";
+
+ var result = results.Single();
+ var buffer = new StringBuilder();
+
+ foreach (var line in result.ExtraOutput)
+ {
+ if (Metric.TryParse(line, out Metric metric))
+ {
+ if (buffer.Length > 0)
+ buffer.Append(", ");
+
+ buffer.Append(metric.ToColumnValue());
+ }
+ }
+
+ return buffer.Length > 0 ? buffer.ToString() : "-";
+ }
+ }
+
+ public struct Metric
+ {
+ private const string Prefix = "// Metric";
+ private const char Separator = '#';
+
+ public string Name { get; }
+ public string Value { get; }
+
+ public Metric(string name, string value) : this()
+ {
+ Name = name;
+ Value = value;
+ }
+
+ public string ToColumnValue()
+ => $"{Name}: {Value}";
+
+ public string ToParsableString()
+ => $"{Prefix} {Separator} {Name} {Separator} {Value}";
+
+ public static bool TryParse(string line, out Metric metric)
+ {
+ metric = default;
+
+ if (!line.StartsWith(Prefix))
+ return false;
+
+ var splitted = line.Split(Separator);
+
+ metric = new Metric(splitted[1].Trim(), splitted[2].Trim());
+
+ return true;
+ }
+ }
+}
diff --git a/test/Microsoft.ML.Benchmarks/Harness/ProjectGenerator.cs b/test/Microsoft.ML.Benchmarks/Harness/ProjectGenerator.cs
new file mode 100644
index 0000000000..7560efe562
--- /dev/null
+++ b/test/Microsoft.ML.Benchmarks/Harness/ProjectGenerator.cs
@@ -0,0 +1,54 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using BenchmarkDotNet.Extensions;
+using BenchmarkDotNet.Toolchains;
+using BenchmarkDotNet.Toolchains.CsProj;
+using System;
+using System.IO;
+using System.Linq;
+
+namespace Microsoft.ML.Benchmarks.Harness
+{
+ ///
+ /// to avoid side effects of benchmarks affect each other BenchmarkDotNet runs every benchmark in a standalone, dedicated process
+ /// however to do that it needs to be able to create, build and run new executable
+ ///
+ /// the problem with ML.NET is that it has native dependencies, which are NOT copied by MSBuild to the output folder
+ /// in case where A has native dependency and B references A
+ ///
+ /// this is why this class exists: to copy the native dependencies to folder with .exe
+ ///
+ public class ProjectGenerator : CsProjGenerator
+ {
+ public ProjectGenerator(string targetFrameworkMoniker) : base(targetFrameworkMoniker, platform => platform.ToConfig(), null)
+ {
+ }
+
+ protected override void CopyAllRequiredFiles(ArtifactsPaths artifactsPaths)
+ {
+ base.CopyAllRequiredFiles(artifactsPaths);
+
+ CopyMissingNativeDependencies(artifactsPaths);
+ }
+
+ private void CopyMissingNativeDependencies(ArtifactsPaths artifactsPaths)
+ {
+ var foldeWithAutogeneratedExe = Path.GetDirectoryName(artifactsPaths.ExecutablePath);
+ var folderWithNativeDependencies = Path.GetDirectoryName(typeof(ProjectGenerator).Assembly.Location);
+
+ foreach (var nativeDependency in Directory
+ .EnumerateFiles(folderWithNativeDependencies)
+ .Where(fileName => ContainsWithIgnoreCase(fileName, "native")))
+ {
+ File.Copy(
+ sourceFileName: nativeDependency,
+ destFileName: Path.Combine(foldeWithAutogeneratedExe, Path.GetFileName(nativeDependency)),
+ overwrite: true);
+ }
+ }
+
+ bool ContainsWithIgnoreCase(string text, string word) => text != null && text.IndexOf(word, StringComparison.InvariantCultureIgnoreCase) >= 0;
+ }
+}
diff --git a/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs b/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs
index 4c269e05fc..f96a5d8803 100644
--- a/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs
+++ b/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs
@@ -4,6 +4,7 @@
using BenchmarkDotNet.Attributes;
using Microsoft.ML.Runtime;
+using Microsoft.ML.Runtime.Internal.Calibration;
using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Runtime.EntryPoints;
using Microsoft.ML.Runtime.KMeans;
@@ -13,21 +14,11 @@ namespace Microsoft.ML.Benchmarks
{
public class KMeansAndLogisticRegressionBench
{
- private static string s_dataPath;
+ private readonly string _dataPath = Program.GetInvariantCultureDataPath("adult.train");
[Benchmark]
- public IPredictor TrainKMeansAndLR() => TrainKMeansAndLRCore();
-
- [GlobalSetup]
- public void Setup()
+ public ParameterMixingCalibratedPredictor TrainKMeansAndLR()
{
- s_dataPath = Program.GetDataPath("adult.train");
- }
-
- private static IPredictor TrainKMeansAndLRCore()
- {
- string dataPath = s_dataPath;
-
using (var env = new TlcEnvironment(seed: 1))
{
// Pipeline
@@ -53,7 +44,7 @@ private static IPredictor TrainKMeansAndLRCore()
new TextLoader.Range() { Min = 10, Max = 12 }
})
}
- }, new MultiFileSource(dataPath));
+ }, new MultiFileSource(_dataPath));
IDataTransform trans = CategoricalTransform.Create(env, new CategoricalTransform.Arguments
{
@@ -83,4 +74,4 @@ private static IPredictor TrainKMeansAndLRCore()
}
}
}
-}
+}
\ No newline at end of file
diff --git a/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj b/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj
index 5a9f3e7467..dfa673ea82 100644
--- a/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj
+++ b/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj
@@ -20,4 +20,16 @@
+
+
+
+ PreserveNewest
+
+
+ PreserveNewest
+
+
+ PreserveNewest
+
+
\ No newline at end of file
diff --git a/test/Microsoft.ML.Benchmarks/Program.cs b/test/Microsoft.ML.Benchmarks/Program.cs
index 0b4e9edc52..5396e17b7b 100644
--- a/test/Microsoft.ML.Benchmarks/Program.cs
+++ b/test/Microsoft.ML.Benchmarks/Program.cs
@@ -6,11 +6,13 @@
using BenchmarkDotNet.Diagnosers;
using BenchmarkDotNet.Jobs;
using BenchmarkDotNet.Running;
-using BenchmarkDotNet.Columns;
-using BenchmarkDotNet.Reports;
-using BenchmarkDotNet.Toolchains.InProcess;
+using BenchmarkDotNet.Toolchains;
+using BenchmarkDotNet.Toolchains.CsProj;
+using BenchmarkDotNet.Toolchains.DotNetCli;
+using Microsoft.ML.Benchmarks.Harness;
+using System.Globalization;
using System.IO;
-using Microsoft.ML.Models;
+using System.Threading;
namespace Microsoft.ML.Benchmarks
{
@@ -28,52 +30,33 @@ static void Main(string[] args)
private static IConfig CreateCustomConfig()
=> DefaultConfig.Instance
.With(Job.Default
+ .WithWarmupCount(1) // for our time consuming benchmarks 1 warmup iteration is enough
.WithMaxIterationCount(20)
- .With(InProcessToolchain.Instance))
- .With(new ClassificationMetricsColumn("AccuracyMacro", "Macro-average accuracy of the model"))
+ .With(CreateToolchain()))
+ .With(new ExtraMetricColumn())
.With(MemoryDiagnoser.Default);
- internal static string GetDataPath(string name)
- => Path.GetFullPath(Path.Combine(_dataRoot, name));
-
- static readonly string _dataRoot;
- static Program()
+ ///
+ /// we need our own toolchain because MSBuild by default does not copy recursive native dependencies to the output
+ ///
+ private static IToolchain CreateToolchain()
{
- var currentAssemblyLocation = new FileInfo(typeof(Program).Assembly.Location);
- var rootDir = currentAssemblyLocation.Directory.Parent.Parent.Parent.Parent.FullName;
- _dataRoot = Path.Combine(rootDir, "test", "data");
+ var csProj = CsProjCoreToolchain.Current.Value;
+ var tfm = NetCoreAppSettings.Current.Value.TargetFrameworkMoniker;
+
+ return new Toolchain(
+ tfm,
+ new ProjectGenerator(tfm),
+ csProj.Builder,
+ csProj.Executor);
}
- }
-
- public class ClassificationMetricsColumn : IColumn
- {
- private readonly string _metricName;
- private readonly string _legend;
- public ClassificationMetricsColumn(string metricName, string legend)
+ internal static string GetInvariantCultureDataPath(string name)
{
- _metricName = metricName;
- _legend = legend;
- }
-
- public string ColumnName => _metricName;
- public string Id => _metricName;
- public string Legend => _legend;
- public bool IsNumeric => true;
- public bool IsDefault(Summary summary, BenchmarkCase benchmark) => true;
- public bool IsAvailable(Summary summary) => true;
- public bool AlwaysShow => true;
- public ColumnCategory Category => ColumnCategory.Custom;
- public int PriorityInCategory => 1;
- public UnitType UnitType => UnitType.Dimensionless;
+ // enforce Neutral Language as "en-us" because the input data files use dot as decimal separator (and it fails for cultures with ",")
+ Thread.CurrentThread.CurrentCulture = CultureInfo.InvariantCulture;
- public string GetValue(Summary summary, BenchmarkCase benchmark, ISummaryStyle style)
- {
- var property = typeof(ClassificationMetrics).GetProperty(_metricName);
- return property.GetValue(StochasticDualCoordinateAscentClassifierBench.s_metrics).ToString();
+ return Path.Combine(Path.GetDirectoryName(typeof(Program).Assembly.Location), "Input", name);
}
- public string GetValue(Summary summary, BenchmarkCase benchmark) => GetValue(summary, benchmark, null);
-
- public override string ToString() => ColumnName;
}
}
diff --git a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs
index 6e0b856dbd..b0c9235198 100644
--- a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs
+++ b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs
@@ -11,22 +11,19 @@
using Microsoft.ML.Runtime.Learners;
using Microsoft.ML.Trainers;
using Microsoft.ML.Transforms;
-using System;
using System.Collections.Generic;
+using System.Globalization;
namespace Microsoft.ML.Benchmarks
{
- public class StochasticDualCoordinateAscentClassifierBench
+ public class StochasticDualCoordinateAscentClassifierBench : WithExtraMetrics
{
- internal static ClassificationMetrics s_metrics;
- private static PredictionModel s_trainedModel;
- private static string s_dataPath;
- private static string s_sentimentDataPath;
- private static IrisData[][] s_batches;
- private static readonly int[] s_batchSizes = new int[] { 1, 2, 5 };
- private readonly Random r = new Random(0);
- private readonly Consumer _consumer = new Consumer();
- private static readonly IrisData s_example = new IrisData()
+ private readonly string _dataPath = Program.GetInvariantCultureDataPath("iris.txt");
+ private readonly string _sentimentDataPath = Program.GetInvariantCultureDataPath("wikipedia-detox-250-line-data.tsv");
+ private readonly Consumer _consumer = new Consumer(); // BenchmarkDotNet utility type used to prevent dead code elimination
+
+ private readonly int[] _batchSizes = new int[] { 1, 2, 5 };
+ private readonly IrisData _example = new IrisData()
{
SepalLength = 3.3f,
SepalWidth = 1.6f,
@@ -34,71 +31,36 @@ public class StochasticDualCoordinateAscentClassifierBench
PetalWidth = 5.1f,
};
- [GlobalSetup]
- public void Setup()
- {
- s_dataPath = Program.GetDataPath("iris.txt");
- s_sentimentDataPath = Program.GetDataPath("wikipedia-detox-250-line-data.tsv");
- s_trainedModel = TrainCore();
- IrisPrediction prediction = s_trainedModel.Predict(s_example);
-
- var testData = new Data.TextLoader(s_dataPath).CreateFrom(useHeader: true);
- var evaluator = new ClassificationEvaluator();
- s_metrics = evaluator.Evaluate(s_trainedModel, testData);
+ private PredictionModel _trainedModel;
+ private IrisData[][] _batches;
+ private ClassificationMetrics _metrics;
- s_batches = new IrisData[s_batchSizes.Length][];
- for (int i = 0; i < s_batches.Length; i++)
- {
- var batch = new IrisData[s_batchSizes[i]];
- s_batches[i] = batch;
- for (int bi = 0; bi < batch.Length; bi++)
- {
- batch[bi] = s_example;
- }
- }
+ protected override IEnumerable GetMetrics()
+ {
+ if (_metrics != null)
+ yield return new Metric(
+ nameof(ClassificationMetrics.AccuracyMacro),
+ _metrics.AccuracyMacro.ToString("0.##", CultureInfo.InvariantCulture));
}
[Benchmark]
- public PredictionModel TrainIris() => TrainCore();
-
- [Benchmark]
- public float[] PredictIris() => s_trainedModel.Predict(s_example).PredictedLabels;
-
- [Benchmark]
- public void PredictIrisBatchOf1() => Consume(s_trainedModel.Predict(s_batches[0]));
+ public PredictionModel TrainIris() => Train(_dataPath);
- [Benchmark]
- public void PredictIrisBatchOf2() => Consume(s_trainedModel.Predict(s_batches[1]));
-
- [Benchmark]
- public void PredictIrisBatchOf5() => Consume(s_trainedModel.Predict(s_batches[2]));
-
- [Benchmark]
- public IPredictor TrainSentiment() => TrainSentimentCore();
-
- private void Consume(IEnumerable predictions)
- {
- foreach (var prediction in predictions)
- _consumer.Consume(prediction);
- }
-
- private static PredictionModel TrainCore()
+ private PredictionModel Train(string dataPath)
{
var pipeline = new LearningPipeline();
- pipeline.Add(new Data.TextLoader(s_dataPath).CreateFrom(useHeader: true));
- pipeline.Add(new ColumnConcatenator(outputColumn: "Features",
- "SepalLength", "SepalWidth", "PetalLength", "PetalWidth"));
+ pipeline.Add(new Data.TextLoader(dataPath).CreateFrom(useHeader: true));
+ pipeline.Add(new ColumnConcatenator(outputColumn: "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth"));
pipeline.Add(new StochasticDualCoordinateAscentClassifier());
- PredictionModel model = pipeline.Train();
- return model;
+ return pipeline.Train();
}
- private static IPredictor TrainSentimentCore()
+ [Benchmark]
+ public void TrainSentiment()
{
- var dataPath = s_sentimentDataPath;
using (var env = new TlcEnvironment(seed: 1))
{
// Pipeline
@@ -125,7 +87,7 @@ private static IPredictor TrainSentimentCore()
Type = DataKind.Text
}
}
- }, new MultiFileSource(dataPath));
+ }, new MultiFileSource(_sentimentDataPath));
var text = TextTransform.Create(env,
new TextTransform.Arguments()
@@ -145,7 +107,7 @@ private static IPredictor TrainSentimentCore()
WordFeatureExtractor = null,
}, loader);
- var trans = new WordEmbeddingsTransform(env,
+ var trans = new WordEmbeddingsTransform(env,
new WordEmbeddingsTransform.Arguments()
{
Column = new WordEmbeddingsTransform.Column[1]
@@ -162,32 +124,74 @@ private static IPredictor TrainSentimentCore()
// Train
var trainer = new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments() { MaxIterations = 20 });
var trainRoles = new RoleMappedData(trans, label: "Label", feature: "Features");
- return trainer.Train(trainRoles);
+
+ var predicted = trainer.Train(trainRoles);
+ _consumer.Consume(predicted);
}
}
- public class IrisData
+ [GlobalSetup(Targets = new string[] { nameof(PredictIris), nameof(PredictIrisBatchOf1), nameof(PredictIrisBatchOf2), nameof(PredictIrisBatchOf5) })]
+ public void SetupPredictBenchmarks()
{
- [Column("0")]
- public float Label;
+ _trainedModel = Train(_dataPath);
+ _consumer.Consume(_trainedModel.Predict(_example));
- [Column("1")]
- public float SepalLength;
+ var testData = new Data.TextLoader(_dataPath).CreateFrom(useHeader: true);
+ var evaluator = new ClassificationEvaluator();
+ _metrics = evaluator.Evaluate(_trainedModel, testData);
+
+ _batches = new IrisData[_batchSizes.Length][];
+ for (int i = 0; i < _batches.Length; i++)
+ {
+ var batch = new IrisData[_batchSizes[i]];
+ _batches[i] = batch;
+ for (int bi = 0; bi < batch.Length; bi++)
+ {
+ batch[bi] = _example;
+ }
+ }
+ }
- [Column("2")]
- public float SepalWidth;
+ [Benchmark]
+ public float[] PredictIris() => _trainedModel.Predict(_example).PredictedLabels;
- [Column("3")]
- public float PetalLength;
+ [Benchmark]
+ public void PredictIrisBatchOf1() => Consume(_trainedModel.Predict(_batches[0]));
- [Column("4")]
- public float PetalWidth;
- }
+ [Benchmark]
+ public void PredictIrisBatchOf2() => Consume(_trainedModel.Predict(_batches[1]));
+
+ [Benchmark]
+ public void PredictIrisBatchOf5() => Consume(_trainedModel.Predict(_batches[2]));
- public class IrisPrediction
+ private void Consume(IEnumerable predictions)
{
- [ColumnName("Score")]
- public float[] PredictedLabels;
+ foreach (var prediction in predictions)
+ _consumer.Consume(prediction);
}
}
+
+ public class IrisData
+ {
+ [Column("0")]
+ public float Label;
+
+ [Column("1")]
+ public float SepalLength;
+
+ [Column("2")]
+ public float SepalWidth;
+
+ [Column("3")]
+ public float PetalLength;
+
+ [Column("4")]
+ public float PetalWidth;
+ }
+
+ public class IrisPrediction
+ {
+ [ColumnName("Score")]
+ public float[] PredictedLabels;
+ }
}