diff --git a/build/Dependencies.props b/build/Dependencies.props index 0b6af3cdc9..e880e8c66b 100644 --- a/build/Dependencies.props +++ b/build/Dependencies.props @@ -10,7 +10,7 @@ 2.1.2.2 0.0.0.5 4.5.0 - 0.11.0 + 0.11.1 1.10.0 diff --git a/test/Microsoft.ML.Benchmarks/Harness/Metrics.cs b/test/Microsoft.ML.Benchmarks/Harness/Metrics.cs new file mode 100644 index 0000000000..11b670cdcd --- /dev/null +++ b/test/Microsoft.ML.Benchmarks/Harness/Metrics.cs @@ -0,0 +1,110 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using BenchmarkDotNet.Attributes; +using BenchmarkDotNet.Columns; +using BenchmarkDotNet.Reports; +using BenchmarkDotNet.Running; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace Microsoft.ML.Benchmarks +{ + public abstract class WithExtraMetrics + { + protected abstract IEnumerable GetMetrics(); + + /// + /// this method is executed after running the benchmrks + /// we use it as hack to simply print to console so ExtraMetricColumn can parse the output + /// + [GlobalCleanup] + public void ReportMetrics() + { + foreach (var metric in GetMetrics()) + { + Console.WriteLine(metric.ToParsableString()); + } + } + } + + public class ExtraMetricColumn : IColumn + { + public string ColumnName => "Extra Metric"; + public string Id => nameof(ExtraMetricColumn); + public string Legend => "Value of the provided extra metric"; + public bool IsNumeric => true; + public bool IsDefault(Summary summary, BenchmarkCase benchmark) => true; + public bool IsAvailable(Summary summary) => true; + public bool AlwaysShow => true; + public ColumnCategory Category => ColumnCategory.Custom; + public int PriorityInCategory => 1; + public UnitType UnitType => UnitType.Dimensionless; + public string GetValue(Summary summary, BenchmarkCase benchmark) => GetValue(summary, benchmark, null); + public override string ToString() => ColumnName; + + public string GetValue(Summary summary, BenchmarkCase benchmark, ISummaryStyle style) + { + if (!summary.HasReport(benchmark)) + return "-"; + + var results = summary[benchmark].ExecuteResults; + if (results.Count != 1) + return "-"; + + var result = results.Single(); + var buffer = new StringBuilder(); + + foreach (var line in result.ExtraOutput) + { + if (Metric.TryParse(line, out Metric metric)) + { + if (buffer.Length > 0) + buffer.Append(", "); + + buffer.Append(metric.ToColumnValue()); + } + } + + return buffer.Length > 0 ? buffer.ToString() : "-"; + } + } + + public struct Metric + { + private const string Prefix = "// Metric"; + private const char Separator = '#'; + + public string Name { get; } + public string Value { get; } + + public Metric(string name, string value) : this() + { + Name = name; + Value = value; + } + + public string ToColumnValue() + => $"{Name}: {Value}"; + + public string ToParsableString() + => $"{Prefix} {Separator} {Name} {Separator} {Value}"; + + public static bool TryParse(string line, out Metric metric) + { + metric = default; + + if (!line.StartsWith(Prefix)) + return false; + + var splitted = line.Split(Separator); + + metric = new Metric(splitted[1].Trim(), splitted[2].Trim()); + + return true; + } + } +} diff --git a/test/Microsoft.ML.Benchmarks/Harness/ProjectGenerator.cs b/test/Microsoft.ML.Benchmarks/Harness/ProjectGenerator.cs new file mode 100644 index 0000000000..7560efe562 --- /dev/null +++ b/test/Microsoft.ML.Benchmarks/Harness/ProjectGenerator.cs @@ -0,0 +1,54 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using BenchmarkDotNet.Extensions; +using BenchmarkDotNet.Toolchains; +using BenchmarkDotNet.Toolchains.CsProj; +using System; +using System.IO; +using System.Linq; + +namespace Microsoft.ML.Benchmarks.Harness +{ + /// + /// to avoid side effects of benchmarks affect each other BenchmarkDotNet runs every benchmark in a standalone, dedicated process + /// however to do that it needs to be able to create, build and run new executable + /// + /// the problem with ML.NET is that it has native dependencies, which are NOT copied by MSBuild to the output folder + /// in case where A has native dependency and B references A + /// + /// this is why this class exists: to copy the native dependencies to folder with .exe + /// + public class ProjectGenerator : CsProjGenerator + { + public ProjectGenerator(string targetFrameworkMoniker) : base(targetFrameworkMoniker, platform => platform.ToConfig(), null) + { + } + + protected override void CopyAllRequiredFiles(ArtifactsPaths artifactsPaths) + { + base.CopyAllRequiredFiles(artifactsPaths); + + CopyMissingNativeDependencies(artifactsPaths); + } + + private void CopyMissingNativeDependencies(ArtifactsPaths artifactsPaths) + { + var foldeWithAutogeneratedExe = Path.GetDirectoryName(artifactsPaths.ExecutablePath); + var folderWithNativeDependencies = Path.GetDirectoryName(typeof(ProjectGenerator).Assembly.Location); + + foreach (var nativeDependency in Directory + .EnumerateFiles(folderWithNativeDependencies) + .Where(fileName => ContainsWithIgnoreCase(fileName, "native"))) + { + File.Copy( + sourceFileName: nativeDependency, + destFileName: Path.Combine(foldeWithAutogeneratedExe, Path.GetFileName(nativeDependency)), + overwrite: true); + } + } + + bool ContainsWithIgnoreCase(string text, string word) => text != null && text.IndexOf(word, StringComparison.InvariantCultureIgnoreCase) >= 0; + } +} diff --git a/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs b/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs index 4c269e05fc..f96a5d8803 100644 --- a/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs +++ b/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs @@ -4,6 +4,7 @@ using BenchmarkDotNet.Attributes; using Microsoft.ML.Runtime; +using Microsoft.ML.Runtime.Internal.Calibration; using Microsoft.ML.Runtime.Data; using Microsoft.ML.Runtime.EntryPoints; using Microsoft.ML.Runtime.KMeans; @@ -13,21 +14,11 @@ namespace Microsoft.ML.Benchmarks { public class KMeansAndLogisticRegressionBench { - private static string s_dataPath; + private readonly string _dataPath = Program.GetInvariantCultureDataPath("adult.train"); [Benchmark] - public IPredictor TrainKMeansAndLR() => TrainKMeansAndLRCore(); - - [GlobalSetup] - public void Setup() + public ParameterMixingCalibratedPredictor TrainKMeansAndLR() { - s_dataPath = Program.GetDataPath("adult.train"); - } - - private static IPredictor TrainKMeansAndLRCore() - { - string dataPath = s_dataPath; - using (var env = new TlcEnvironment(seed: 1)) { // Pipeline @@ -53,7 +44,7 @@ private static IPredictor TrainKMeansAndLRCore() new TextLoader.Range() { Min = 10, Max = 12 } }) } - }, new MultiFileSource(dataPath)); + }, new MultiFileSource(_dataPath)); IDataTransform trans = CategoricalTransform.Create(env, new CategoricalTransform.Arguments { @@ -83,4 +74,4 @@ private static IPredictor TrainKMeansAndLRCore() } } } -} +} \ No newline at end of file diff --git a/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj b/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj index 5a9f3e7467..dfa673ea82 100644 --- a/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj +++ b/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj @@ -20,4 +20,16 @@ + + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + \ No newline at end of file diff --git a/test/Microsoft.ML.Benchmarks/Program.cs b/test/Microsoft.ML.Benchmarks/Program.cs index 0b4e9edc52..5396e17b7b 100644 --- a/test/Microsoft.ML.Benchmarks/Program.cs +++ b/test/Microsoft.ML.Benchmarks/Program.cs @@ -6,11 +6,13 @@ using BenchmarkDotNet.Diagnosers; using BenchmarkDotNet.Jobs; using BenchmarkDotNet.Running; -using BenchmarkDotNet.Columns; -using BenchmarkDotNet.Reports; -using BenchmarkDotNet.Toolchains.InProcess; +using BenchmarkDotNet.Toolchains; +using BenchmarkDotNet.Toolchains.CsProj; +using BenchmarkDotNet.Toolchains.DotNetCli; +using Microsoft.ML.Benchmarks.Harness; +using System.Globalization; using System.IO; -using Microsoft.ML.Models; +using System.Threading; namespace Microsoft.ML.Benchmarks { @@ -28,52 +30,33 @@ static void Main(string[] args) private static IConfig CreateCustomConfig() => DefaultConfig.Instance .With(Job.Default + .WithWarmupCount(1) // for our time consuming benchmarks 1 warmup iteration is enough .WithMaxIterationCount(20) - .With(InProcessToolchain.Instance)) - .With(new ClassificationMetricsColumn("AccuracyMacro", "Macro-average accuracy of the model")) + .With(CreateToolchain())) + .With(new ExtraMetricColumn()) .With(MemoryDiagnoser.Default); - internal static string GetDataPath(string name) - => Path.GetFullPath(Path.Combine(_dataRoot, name)); - - static readonly string _dataRoot; - static Program() + /// + /// we need our own toolchain because MSBuild by default does not copy recursive native dependencies to the output + /// + private static IToolchain CreateToolchain() { - var currentAssemblyLocation = new FileInfo(typeof(Program).Assembly.Location); - var rootDir = currentAssemblyLocation.Directory.Parent.Parent.Parent.Parent.FullName; - _dataRoot = Path.Combine(rootDir, "test", "data"); + var csProj = CsProjCoreToolchain.Current.Value; + var tfm = NetCoreAppSettings.Current.Value.TargetFrameworkMoniker; + + return new Toolchain( + tfm, + new ProjectGenerator(tfm), + csProj.Builder, + csProj.Executor); } - } - - public class ClassificationMetricsColumn : IColumn - { - private readonly string _metricName; - private readonly string _legend; - public ClassificationMetricsColumn(string metricName, string legend) + internal static string GetInvariantCultureDataPath(string name) { - _metricName = metricName; - _legend = legend; - } - - public string ColumnName => _metricName; - public string Id => _metricName; - public string Legend => _legend; - public bool IsNumeric => true; - public bool IsDefault(Summary summary, BenchmarkCase benchmark) => true; - public bool IsAvailable(Summary summary) => true; - public bool AlwaysShow => true; - public ColumnCategory Category => ColumnCategory.Custom; - public int PriorityInCategory => 1; - public UnitType UnitType => UnitType.Dimensionless; + // enforce Neutral Language as "en-us" because the input data files use dot as decimal separator (and it fails for cultures with ",") + Thread.CurrentThread.CurrentCulture = CultureInfo.InvariantCulture; - public string GetValue(Summary summary, BenchmarkCase benchmark, ISummaryStyle style) - { - var property = typeof(ClassificationMetrics).GetProperty(_metricName); - return property.GetValue(StochasticDualCoordinateAscentClassifierBench.s_metrics).ToString(); + return Path.Combine(Path.GetDirectoryName(typeof(Program).Assembly.Location), "Input", name); } - public string GetValue(Summary summary, BenchmarkCase benchmark) => GetValue(summary, benchmark, null); - - public override string ToString() => ColumnName; } } diff --git a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs index 6e0b856dbd..b0c9235198 100644 --- a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs +++ b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs @@ -11,22 +11,19 @@ using Microsoft.ML.Runtime.Learners; using Microsoft.ML.Trainers; using Microsoft.ML.Transforms; -using System; using System.Collections.Generic; +using System.Globalization; namespace Microsoft.ML.Benchmarks { - public class StochasticDualCoordinateAscentClassifierBench + public class StochasticDualCoordinateAscentClassifierBench : WithExtraMetrics { - internal static ClassificationMetrics s_metrics; - private static PredictionModel s_trainedModel; - private static string s_dataPath; - private static string s_sentimentDataPath; - private static IrisData[][] s_batches; - private static readonly int[] s_batchSizes = new int[] { 1, 2, 5 }; - private readonly Random r = new Random(0); - private readonly Consumer _consumer = new Consumer(); - private static readonly IrisData s_example = new IrisData() + private readonly string _dataPath = Program.GetInvariantCultureDataPath("iris.txt"); + private readonly string _sentimentDataPath = Program.GetInvariantCultureDataPath("wikipedia-detox-250-line-data.tsv"); + private readonly Consumer _consumer = new Consumer(); // BenchmarkDotNet utility type used to prevent dead code elimination + + private readonly int[] _batchSizes = new int[] { 1, 2, 5 }; + private readonly IrisData _example = new IrisData() { SepalLength = 3.3f, SepalWidth = 1.6f, @@ -34,71 +31,36 @@ public class StochasticDualCoordinateAscentClassifierBench PetalWidth = 5.1f, }; - [GlobalSetup] - public void Setup() - { - s_dataPath = Program.GetDataPath("iris.txt"); - s_sentimentDataPath = Program.GetDataPath("wikipedia-detox-250-line-data.tsv"); - s_trainedModel = TrainCore(); - IrisPrediction prediction = s_trainedModel.Predict(s_example); - - var testData = new Data.TextLoader(s_dataPath).CreateFrom(useHeader: true); - var evaluator = new ClassificationEvaluator(); - s_metrics = evaluator.Evaluate(s_trainedModel, testData); + private PredictionModel _trainedModel; + private IrisData[][] _batches; + private ClassificationMetrics _metrics; - s_batches = new IrisData[s_batchSizes.Length][]; - for (int i = 0; i < s_batches.Length; i++) - { - var batch = new IrisData[s_batchSizes[i]]; - s_batches[i] = batch; - for (int bi = 0; bi < batch.Length; bi++) - { - batch[bi] = s_example; - } - } + protected override IEnumerable GetMetrics() + { + if (_metrics != null) + yield return new Metric( + nameof(ClassificationMetrics.AccuracyMacro), + _metrics.AccuracyMacro.ToString("0.##", CultureInfo.InvariantCulture)); } [Benchmark] - public PredictionModel TrainIris() => TrainCore(); - - [Benchmark] - public float[] PredictIris() => s_trainedModel.Predict(s_example).PredictedLabels; - - [Benchmark] - public void PredictIrisBatchOf1() => Consume(s_trainedModel.Predict(s_batches[0])); + public PredictionModel TrainIris() => Train(_dataPath); - [Benchmark] - public void PredictIrisBatchOf2() => Consume(s_trainedModel.Predict(s_batches[1])); - - [Benchmark] - public void PredictIrisBatchOf5() => Consume(s_trainedModel.Predict(s_batches[2])); - - [Benchmark] - public IPredictor TrainSentiment() => TrainSentimentCore(); - - private void Consume(IEnumerable predictions) - { - foreach (var prediction in predictions) - _consumer.Consume(prediction); - } - - private static PredictionModel TrainCore() + private PredictionModel Train(string dataPath) { var pipeline = new LearningPipeline(); - pipeline.Add(new Data.TextLoader(s_dataPath).CreateFrom(useHeader: true)); - pipeline.Add(new ColumnConcatenator(outputColumn: "Features", - "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")); + pipeline.Add(new Data.TextLoader(dataPath).CreateFrom(useHeader: true)); + pipeline.Add(new ColumnConcatenator(outputColumn: "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")); pipeline.Add(new StochasticDualCoordinateAscentClassifier()); - PredictionModel model = pipeline.Train(); - return model; + return pipeline.Train(); } - private static IPredictor TrainSentimentCore() + [Benchmark] + public void TrainSentiment() { - var dataPath = s_sentimentDataPath; using (var env = new TlcEnvironment(seed: 1)) { // Pipeline @@ -125,7 +87,7 @@ private static IPredictor TrainSentimentCore() Type = DataKind.Text } } - }, new MultiFileSource(dataPath)); + }, new MultiFileSource(_sentimentDataPath)); var text = TextTransform.Create(env, new TextTransform.Arguments() @@ -145,7 +107,7 @@ private static IPredictor TrainSentimentCore() WordFeatureExtractor = null, }, loader); - var trans = new WordEmbeddingsTransform(env, + var trans = new WordEmbeddingsTransform(env, new WordEmbeddingsTransform.Arguments() { Column = new WordEmbeddingsTransform.Column[1] @@ -162,32 +124,74 @@ private static IPredictor TrainSentimentCore() // Train var trainer = new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments() { MaxIterations = 20 }); var trainRoles = new RoleMappedData(trans, label: "Label", feature: "Features"); - return trainer.Train(trainRoles); + + var predicted = trainer.Train(trainRoles); + _consumer.Consume(predicted); } } - public class IrisData + [GlobalSetup(Targets = new string[] { nameof(PredictIris), nameof(PredictIrisBatchOf1), nameof(PredictIrisBatchOf2), nameof(PredictIrisBatchOf5) })] + public void SetupPredictBenchmarks() { - [Column("0")] - public float Label; + _trainedModel = Train(_dataPath); + _consumer.Consume(_trainedModel.Predict(_example)); - [Column("1")] - public float SepalLength; + var testData = new Data.TextLoader(_dataPath).CreateFrom(useHeader: true); + var evaluator = new ClassificationEvaluator(); + _metrics = evaluator.Evaluate(_trainedModel, testData); + + _batches = new IrisData[_batchSizes.Length][]; + for (int i = 0; i < _batches.Length; i++) + { + var batch = new IrisData[_batchSizes[i]]; + _batches[i] = batch; + for (int bi = 0; bi < batch.Length; bi++) + { + batch[bi] = _example; + } + } + } - [Column("2")] - public float SepalWidth; + [Benchmark] + public float[] PredictIris() => _trainedModel.Predict(_example).PredictedLabels; - [Column("3")] - public float PetalLength; + [Benchmark] + public void PredictIrisBatchOf1() => Consume(_trainedModel.Predict(_batches[0])); - [Column("4")] - public float PetalWidth; - } + [Benchmark] + public void PredictIrisBatchOf2() => Consume(_trainedModel.Predict(_batches[1])); + + [Benchmark] + public void PredictIrisBatchOf5() => Consume(_trainedModel.Predict(_batches[2])); - public class IrisPrediction + private void Consume(IEnumerable predictions) { - [ColumnName("Score")] - public float[] PredictedLabels; + foreach (var prediction in predictions) + _consumer.Consume(prediction); } } + + public class IrisData + { + [Column("0")] + public float Label; + + [Column("1")] + public float SepalLength; + + [Column("2")] + public float SepalWidth; + + [Column("3")] + public float PetalLength; + + [Column("4")] + public float PetalWidth; + } + + public class IrisPrediction + { + [ColumnName("Score")] + public float[] PredictedLabels; + } }