Skip to content

Commit f0f04ef

Browse files
Anipikjustinormont
authored andcommitted
Added Benchmark performance tests for wikidetoxData (#820)
* added some performance tests * added word embeding performancetests * Build Failure Corrected * feedback addressed * Casing, static fields removed and excpetion corrected * s_ removed * Names changed
1 parent f20a8e8 commit f0f04ef

File tree

5 files changed

+105
-0
lines changed

5 files changed

+105
-0
lines changed

build.proj

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,10 @@
7979
<TestFile Include="$(MSBuildThisFileDirectory)/test/data/external/winequality-white.csv"
8080
Url="https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv"
8181
DestinationFile="$(MSBuildThisFileDirectory)test/data/external/winequality-white.csv" />
82+
83+
<TestFile Condition="'$(IncludeBenchmarkData)' == 'true'" Include="$(MSBuildThisFileDirectory)/test/data/external/WikiDetoxAnnotated160kRows.tsv"
84+
Url="http://aka.ms/tlc-resources/benchmarks/WikiDetoxAnnotated160kRows.tsv"
85+
DestinationFile="$(MSBuildThisFileDirectory)test/data/external/WikiDetoxAnnotated160kRows.tsv" />
8286
</ItemGroup>
8387

8488
<Target Name="DownloadExternalTestFiles" Inputs="@(TestFile)" Outputs="%(TestFile.DestinationFile)">

src/Microsoft.ML.Maml/Properties/AssemblyInfo.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@
77
using System.Runtime.InteropServices;
88

99
[assembly: InternalsVisibleTo("Microsoft.ML.TestFramework, PublicKey=002400000480000094000000060200000024000052534131000400000100010015c01ae1f50e8cc09ba9eac9147cf8fd9fce2cfe9f8dce4f7301c4132ca9fb50ce8cbf1df4dc18dd4d210e4345c744ecb3365ed327efdbc52603faa5e21daa11234c8c4a73e51f03bf192544581ebe107adee3a34928e39d04e524a9ce729d5090bfd7dad9d10c722c0def9ccc08ff0a03790e48bcd1f9b6c476063e1966a1c4")]
10+
[assembly: InternalsVisibleTo("Microsoft.ML.Benchmarks, PublicKey=002400000480000094000000060200000024000052534131000400000100010015c01ae1f50e8cc09ba9eac9147cf8fd9fce2cfe9f8dce4f7301c4132ca9fb50ce8cbf1df4dc18dd4d210e4345c744ecb3365ed327efdbc52603faa5e21daa11234c8c4a73e51f03bf192544581ebe107adee3a34928e39d04e524a9ce729d5090bfd7dad9d10c722c0def9ccc08ff0a03790e48bcd1f9b6c476063e1966a1c4")]
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
5+
using BenchmarkDotNet.Attributes;
6+
using Microsoft.ML.Runtime;
7+
using Microsoft.ML.Runtime.Data;
8+
using Microsoft.ML.Runtime.RunTests;
9+
using Microsoft.ML.Runtime.Tools;
10+
using System.IO;
11+
using System.Text;
12+
13+
namespace Microsoft.ML.Benchmarks
14+
{
15+
// Adding this class to not print anything to the console.
16+
// This is required for the current version of BenchmarkDotNet
17+
internal class EmptyWriter : TextWriter
18+
{
19+
internal static readonly EmptyWriter Instance = new EmptyWriter();
20+
public override Encoding Encoding => null;
21+
}
22+
23+
public class BigramAndTrigramBenchmark
24+
{
25+
private string _dataPath_Wiki;
26+
private string _modelPath_Wiki;
27+
28+
[GlobalSetup(Targets = new string[] {
29+
nameof(CV_Multiclass_WikiDetox_BigramsAndTrichar_OVAAveragedPerceptron),
30+
nameof(CV_Multiclass_WikiDetox_BigramsAndTrichar_LightGBMMulticlass) })]
31+
public void SetupTrainingSpeedTests()
32+
{
33+
_dataPath_Wiki = Path.GetFullPath(TestDatasets.WikiDetox.trainFilename);
34+
35+
if (!File.Exists(_dataPath_Wiki))
36+
{
37+
throw new FileNotFoundException($"Could not find {_dataPath_Wiki} Please ensure you have run 'build.cmd -- /t:DownloadExternalTestFiles /p:IncludeBenchmarkData=true' from the root");
38+
}
39+
}
40+
41+
[GlobalSetup(Target = nameof(Test_Multiclass_WikiDetox_BigramsAndTrichar_OVAAveragedPerceptron))]
42+
public void SetupScoringSpeedTests()
43+
{
44+
SetupTrainingSpeedTests();
45+
_modelPath_Wiki = Path.Combine(Directory.GetCurrentDirectory(), @"WikiModel.zip");
46+
string cmd = @"CV k=5 data=" + _dataPath_Wiki + " loader=TextLoader{quote=- sparse=- col=Label:R4:0 col=rev_id:TX:1 col=comment:TX:2 col=logged_in:BL:4 col=ns:TX:5 col=sample:TX:6 col=split:TX:7 col=year:R4:3 header=+} xf=Convert{col=logged_in type=R4} xf=CategoricalTransform{col=ns} xf=TextTransform{col=FeaturesText:comment wordExtractor=NGramExtractorTransform{ngram=2}} xf=Concat{col=Features:FeaturesText,logged_in,ns} tr=OVA{p=AveragedPerceptron{iter=10}} out={" + _modelPath_Wiki + "}";
47+
using (var tlc = new TlcEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance))
48+
{
49+
Maml.MainCore(tlc, cmd, alwaysPrintStacktrace: false);
50+
}
51+
}
52+
53+
[Benchmark]
54+
public void CV_Multiclass_WikiDetox_BigramsAndTrichar_OVAAveragedPerceptron()
55+
{
56+
string cmd = @"CV k=5 data=" + _dataPath_Wiki + " loader=TextLoader{quote=- sparse=- col=Label:R4:0 col=rev_id:TX:1 col=comment:TX:2 col=logged_in:BL:4 col=ns:TX:5 col=sample:TX:6 col=split:TX:7 col=year:R4:3 header=+} xf=Convert{col=logged_in type=R4} xf=CategoricalTransform{col=ns} xf=TextTransform{col=FeaturesText:comment wordExtractor=NGramExtractorTransform{ngram=2}} xf=Concat{col=Features:FeaturesText,logged_in,ns} tr=OVA{p=AveragedPerceptron{iter=10}}";
57+
using (var tlc = new TlcEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance))
58+
{
59+
Maml.MainCore(tlc, cmd, alwaysPrintStacktrace: false);
60+
}
61+
}
62+
63+
[Benchmark]
64+
public void CV_Multiclass_WikiDetox_BigramsAndTrichar_LightGBMMulticlass()
65+
{
66+
string cmd = @"CV k=5 data=" + _dataPath_Wiki + " loader=TextLoader{quote=- sparse=- col=Label:R4:0 col=rev_id:TX:1 col=comment:TX:2 col=logged_in:BL:4 col=ns:TX:5 col=sample:TX:6 col=split:TX:7 col=year:R4:3 header=+} xf=Convert{col=logged_in type=R4} xf=CategoricalTransform{col=ns} xf=TextTransform{col=FeaturesText:comment wordExtractor=NGramExtractorTransform{ngram=2}} xf=Concat{col=Features:FeaturesText,logged_in,ns} tr=LightGBMMulticlass{}";
67+
using (var tlc = new TlcEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance))
68+
{
69+
Maml.MainCore(tlc, cmd, alwaysPrintStacktrace: false);
70+
}
71+
}
72+
73+
[Benchmark]
74+
public void Test_Multiclass_WikiDetox_BigramsAndTrichar_OVAAveragedPerceptron()
75+
{
76+
// This benchmark is profiling bulk scoring speed and not training speed.
77+
string modelpath = Path.Combine(Directory.GetCurrentDirectory(), @"WikiModel.fold000.zip");
78+
string cmd = @"Test data=" + _dataPath_Wiki + " in=" + modelpath;
79+
using (var tlc = new TlcEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance))
80+
{
81+
Maml.MainCore(tlc, cmd, alwaysPrintStacktrace: false);
82+
}
83+
}
84+
}
85+
}

test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,11 @@
1313
<PackageReference Include="BenchmarkDotNet" Version="$(BenchmarkDotNetVersion)" />
1414
</ItemGroup>
1515
<ItemGroup>
16+
<ProjectReference Include="..\..\src\Microsoft.ML.Core\Microsoft.ML.Core.csproj" />
17+
<ProjectReference Include="..\..\src\Microsoft.ML.LightGBM\Microsoft.ML.LightGBM.csproj" />
1618
<ProjectReference Include="..\..\src\Microsoft.ML.KMeansClustering\Microsoft.ML.KMeansClustering.csproj" />
1719
<ProjectReference Include="..\..\src\Microsoft.ML.StandardLearners\Microsoft.ML.StandardLearners.csproj" />
20+
<ProjectReference Include="..\..\test\Microsoft.ML.TestFramework\Microsoft.ML.TestFramework.csproj" />
1821
<ProjectReference Include="..\..\src\Microsoft.ML\Microsoft.ML.csproj" />
1922
</ItemGroup>
2023
<ItemGroup>
@@ -31,5 +34,10 @@
3134
<None Include="..\data\wikipedia-detox-250-line-data.tsv" Link="Input\wikipedia-detox-250-line-data.tsv">
3235
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
3336
</None>
37+
<None Condition="Exists('..\data\external\WikiDetoxAnnotated160kRows.tsv')"
38+
Include="..\data\external\WikiDetoxAnnotated160kRows.tsv"
39+
Link="external\WikiDetoxAnnotated160kRows.tsv">
40+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
41+
</None>
3442
</ItemGroup>
3543
</Project>

test/Microsoft.ML.TestFramework/Datasets.cs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,13 @@ public static class TestDatasets
160160
loaderSettings = "col=Label:R4:11 col=Features:R4:0-10 sep=; header+"
161161
};
162162

163+
public static TestDataset WikiDetox = new TestDataset
164+
{
165+
name = "WikiDetox",
166+
trainFilename = "external/WikiDetoxAnnotated160kRows.tsv",
167+
testFilename = "external/WikiDetoxAnnotated160kRows.tsv"
168+
};
169+
163170
public static TestDataset winequality = new TestDataset
164171
{
165172
name = "wine",

0 commit comments

Comments
 (0)