From ab5d4732aa14aecc3173eeefc9886f5cf9daaae6 Mon Sep 17 00:00:00 2001 From: Anipik Date: Tue, 11 Sep 2018 14:15:33 -0700 Subject: [PATCH 1/8] added numeric ranking tests --- build.proj | 22 ++++- .../Microsoft.ML.Benchmarks.csproj | 16 ++++ .../Numeric/Ranking.cs | 91 +++++++++++++++++++ test/Microsoft.ML.TestFramework/Datasets.cs | 8 ++ 4 files changed, 133 insertions(+), 4 deletions(-) create mode 100644 test/Microsoft.ML.Benchmarks/Numeric/Ranking.cs diff --git a/build.proj b/build.proj index 0a44ea7de7..fb13382f4e 100644 --- a/build.proj +++ b/build.proj @@ -78,11 +78,25 @@ - - + + + + + + + + + + diff --git a/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj b/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj index d78a5881fa..2de53eda9e 100644 --- a/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj +++ b/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj @@ -22,6 +22,7 @@ + @@ -39,5 +40,20 @@ Link="external\WikiDetoxAnnotated160kRows.tsv"> PreserveNewest + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + \ No newline at end of file diff --git a/test/Microsoft.ML.Benchmarks/Numeric/Ranking.cs b/test/Microsoft.ML.Benchmarks/Numeric/Ranking.cs new file mode 100644 index 0000000000..b2410ce47a --- /dev/null +++ b/test/Microsoft.ML.Benchmarks/Numeric/Ranking.cs @@ -0,0 +1,91 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using BenchmarkDotNet.Attributes; +using Microsoft.ML.Runtime; +using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Runtime.RunTests; +using Microsoft.ML.Runtime.Tools; +using System.IO; + +namespace Microsoft.ML.Benchmarks +{ + public class Ranking + { + public string _mslrWeb10k_Validate; + public string _mslrWeb10k_Train; + public string _mslrWeb10k_Test; + private string _modelPath_MSLR; + + [GlobalSetup(Targets = new string[] { + nameof(TrainTest_Multiclass_MSLRWeb10K_Ranking_FastTree), + nameof(TrainTest_Multiclass_MSLRWeb10K_Ranking_LightGBM) })] + public void SetupTrainingSpeedTests() + { + _mslrWeb10k_Validate = Path.GetFullPath(TestDatasets.MSLRWeb.validFilename); + _mslrWeb10k_Train = Path.GetFullPath(TestDatasets.MSLRWeb.trainFilename); + + if (!File.Exists(_mslrWeb10k_Validate)) + { + throw new FileNotFoundException($"Could not find {_mslrWeb10k_Validate} Please ensure you have run 'build.cmd -- /t:DownloadExternalTestFiles /p:IncludeBenchmarkData=true' from the root"); + } + + if (!File.Exists(_mslrWeb10k_Train)) + { + throw new FileNotFoundException($"Could not find {_mslrWeb10k_Train} Please ensure you have run 'build.cmd -- /t:DownloadExternalTestFiles /p:IncludeBenchmarkData=true' from the root"); + } + } + + [GlobalSetup(Target = nameof(Test_Multiclass_MSLRWeb10K_Ranking_FastTree))] + public void SetupScoringSpeedTests() + { + _mslrWeb10k_Test = Path.GetFullPath(TestDatasets.MSLRWeb.testFilename); + if (!File.Exists(_mslrWeb10k_Test)) + { + throw new FileNotFoundException($"Could not find {_mslrWeb10k_Test} Please ensure you have run 'build.cmd -- /t:DownloadExternalTestFiles /p:IncludeBenchmarkData=true' from the root"); + + } + + SetupTrainingSpeedTests(); + _modelPath_MSLR = Path.Combine(Directory.GetCurrentDirectory(), @"FastTreeRankingModel.zip"); + string cmd = @"TrainTest test=" + _mslrWeb10k_Validate + " eval=RankingEvaluator{t=10} data=" + _mslrWeb10k_Train+ " loader=TextLoader{col=Label:R4:0 col=GroupId:TX:1 col=Features:R4:2-138} xf=HashTransform{col=GroupId} xf=NAHandleTransform{col=Features} tr=FastTreeRanking{} out={" + _modelPath_MSLR + "}"; + using (var tlc = new TlcEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) + { + Maml.MainCore(tlc, cmd, alwaysPrintStacktrace: false); + } + } + + [Benchmark] + public void TrainTest_Multiclass_MSLRWeb10K_Ranking_FastTree() + { + string cmd = @"TrainTest test=" + _mslrWeb10k_Validate + " eval=RankingEvaluator{t=10} data=" + _mslrWeb10k_Train + " loader=TextLoader{col=Label:R4:0 col=GroupId:TX:1 col=Features:R4:2-138} xf=HashTransform{col=GroupId} xf=NAHandleTransform{col=Features} tr=FastTreeRanking{}"; + using (var tlc = new TlcEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) + { + Maml.MainCore(tlc, cmd, alwaysPrintStacktrace: false); + } + } + + [Benchmark] + public void TrainTest_Multiclass_MSLRWeb10K_Ranking_LightGBM() + { + string cmd = @"TrainTest test=" + _mslrWeb10k_Validate + " eval=RankingEvaluator{t=10} data=" + _mslrWeb10k_Train + " loader=TextLoader{col=Label:R4:0 col=GroupId:TX:1 col=Features:R4:2-138} xf=HashTransform{col=GroupId} xf=NAHandleTransform{col=Features} tr=LightGBMRanking{}"; + using (var tlc = new TlcEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) + { + Maml.MainCore(tlc, cmd, alwaysPrintStacktrace: false); + } + } + + [Benchmark] + public void Test_Multiclass_MSLRWeb10K_Ranking_FastTree() + { + // This benchmark is profiling bulk scoring speed and not training speed. + string modelpath = Path.Combine(Directory.GetCurrentDirectory(), @"FastTreeRankingModel.zip"); + string cmd = @"Test data=" + _mslrWeb10k_Test + " in="+ modelpath; + using (var tlc = new TlcEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) + { + Maml.MainCore(tlc, cmd, alwaysPrintStacktrace: false); + } + } + } +} diff --git a/test/Microsoft.ML.TestFramework/Datasets.cs b/test/Microsoft.ML.TestFramework/Datasets.cs index 6e327b5c66..11e6f272a4 100644 --- a/test/Microsoft.ML.TestFramework/Datasets.cs +++ b/test/Microsoft.ML.TestFramework/Datasets.cs @@ -167,6 +167,14 @@ public static class TestDatasets testFilename = "external/WikiDetoxAnnotated160kRows.tsv" }; + public static TestDataset MSLRWeb = new TestDataset + { + name = "MSLRWeb", + trainFilename = "external/MSLRWeb10KTrain3.6MRows.tsv", + validFilename = "external/MSLRWeb10KValidate1.2MRows.tsv", + testFilename = "external/MSLRWeb10KTest1.2MRows.tsv" + }; + public static TestDataset winequality = new TestDataset { name = "wine", From 1eb2b1f124f99f272ff74420c6745576b8012336 Mon Sep 17 00:00:00 2001 From: Anipik Date: Wed, 12 Sep 2018 14:51:06 -0700 Subject: [PATCH 2/8] feedback, indentation added --- build.proj | 24 +++---- build/ExternalBenchmarkDataFiles.props | 8 +++ test/Microsoft.ML.Benchmarks/Helpers.cs | 22 +++++++ .../Microsoft.ML.Benchmarks.csproj | 28 +++----- .../Numeric/Ranking.cs | 66 +++++++++++-------- .../Text/MultiClassClassification.cs | 60 ++++++++++++----- test/Microsoft.ML.TestFramework/Datasets.cs | 6 +- 7 files changed, 133 insertions(+), 81 deletions(-) create mode 100644 build/ExternalBenchmarkDataFiles.props create mode 100644 test/Microsoft.ML.Benchmarks/Helpers.cs diff --git a/build.proj b/build.proj index fb13382f4e..d88afc5b02 100644 --- a/build.proj +++ b/build.proj @@ -7,6 +7,7 @@ + @@ -81,22 +82,13 @@ DestinationFile="$(MSBuildThisFileDirectory)test/data/external/winequality-white.csv" /> - - - - - - - - + + + http://aka.ms/tlc-resources/benchmarks/%(Identity) + $(MSBuildThisFileDirectory)/test/data/external/%(Identity) + + + diff --git a/build/ExternalBenchmarkDataFiles.props b/build/ExternalBenchmarkDataFiles.props new file mode 100644 index 0000000000..615cd44862 --- /dev/null +++ b/build/ExternalBenchmarkDataFiles.props @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/test/Microsoft.ML.Benchmarks/Helpers.cs b/test/Microsoft.ML.Benchmarks/Helpers.cs new file mode 100644 index 0000000000..55832fa13f --- /dev/null +++ b/test/Microsoft.ML.Benchmarks/Helpers.cs @@ -0,0 +1,22 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.IO; +using System.Text; + +namespace Microsoft.ML.Benchmarks +{ + internal class Helpers + { + public static string DatasetNotFound = "Could not find {0} Please ensure you have run 'build.cmd -- /t:DownloadExternalTestFiles /p:IncludeBenchmarkData=true' from the root"; + } + + // Adding this class to not print anything to the console. + // This is required for the current version of BenchmarkDotNet + internal class EmptyWriter : TextWriter + { + internal static readonly EmptyWriter Instance = new EmptyWriter(); + public override Encoding Encoding => null; + } +} diff --git a/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj b/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj index 2de53eda9e..c5bd622890 100644 --- a/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj +++ b/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj @@ -1,4 +1,5 @@  + Exe 7.2 @@ -35,25 +36,14 @@ PreserveNewest - + + + external\%(Identity) + + + PreserveNewest - - - PreserveNewest - - - PreserveNewest - - - PreserveNewest - + \ No newline at end of file diff --git a/test/Microsoft.ML.Benchmarks/Numeric/Ranking.cs b/test/Microsoft.ML.Benchmarks/Numeric/Ranking.cs index b2410ce47a..b1f93e0384 100644 --- a/test/Microsoft.ML.Benchmarks/Numeric/Ranking.cs +++ b/test/Microsoft.ML.Benchmarks/Numeric/Ranking.cs @@ -1,4 +1,4 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. @@ -13,43 +13,45 @@ namespace Microsoft.ML.Benchmarks { public class Ranking { - public string _mslrWeb10k_Validate; - public string _mslrWeb10k_Train; - public string _mslrWeb10k_Test; + private string _mslrWeb10k_Validate; + private string _mslrWeb10k_Train; + private string _mslrWeb10k_Test; private string _modelPath_MSLR; [GlobalSetup(Targets = new string[] { - nameof(TrainTest_Multiclass_MSLRWeb10K_Ranking_FastTree), - nameof(TrainTest_Multiclass_MSLRWeb10K_Ranking_LightGBM) })] + nameof(TrainTest_Ranking_MSLRWeb10K_RawNumericFeatures_FastTreeRanking), + nameof(TrainTest_Ranking_MSLRWeb10K_RawNumericFeatures_LightGBMRanking) })] public void SetupTrainingSpeedTests() { _mslrWeb10k_Validate = Path.GetFullPath(TestDatasets.MSLRWeb.validFilename); _mslrWeb10k_Train = Path.GetFullPath(TestDatasets.MSLRWeb.trainFilename); if (!File.Exists(_mslrWeb10k_Validate)) - { - throw new FileNotFoundException($"Could not find {_mslrWeb10k_Validate} Please ensure you have run 'build.cmd -- /t:DownloadExternalTestFiles /p:IncludeBenchmarkData=true' from the root"); - } + throw new FileNotFoundException(string.Format(Helpers.DatasetNotFound, _mslrWeb10k_Validate)); if (!File.Exists(_mslrWeb10k_Train)) - { - throw new FileNotFoundException($"Could not find {_mslrWeb10k_Train} Please ensure you have run 'build.cmd -- /t:DownloadExternalTestFiles /p:IncludeBenchmarkData=true' from the root"); - } + throw new FileNotFoundException(string.Format(Helpers.DatasetNotFound, _mslrWeb10k_Train)); } - [GlobalSetup(Target = nameof(Test_Multiclass_MSLRWeb10K_Ranking_FastTree))] + [GlobalSetup(Target = nameof(Test_Ranking_MSLRWeb10K_RawNumericFeatures_FastTreeRanking))] public void SetupScoringSpeedTests() { _mslrWeb10k_Test = Path.GetFullPath(TestDatasets.MSLRWeb.testFilename); if (!File.Exists(_mslrWeb10k_Test)) - { - throw new FileNotFoundException($"Could not find {_mslrWeb10k_Test} Please ensure you have run 'build.cmd -- /t:DownloadExternalTestFiles /p:IncludeBenchmarkData=true' from the root"); - - } - + throw new FileNotFoundException(string.Format(Helpers.DatasetNotFound, _mslrWeb10k_Test)); + SetupTrainingSpeedTests(); _modelPath_MSLR = Path.Combine(Directory.GetCurrentDirectory(), @"FastTreeRankingModel.zip"); - string cmd = @"TrainTest test=" + _mslrWeb10k_Validate + " eval=RankingEvaluator{t=10} data=" + _mslrWeb10k_Train+ " loader=TextLoader{col=Label:R4:0 col=GroupId:TX:1 col=Features:R4:2-138} xf=HashTransform{col=GroupId} xf=NAHandleTransform{col=Features} tr=FastTreeRanking{} out={" + _modelPath_MSLR + "}"; + + string cmd = @"TrainTest test=" + _mslrWeb10k_Validate + + " eval=RankingEvaluator{t=10}" + + " data=" + _mslrWeb10k_Train + + " loader=TextLoader{col=Label:R4:0 col=GroupId:TX:1 col=Features:R4:2-138}" + + " xf=HashTransform{col=GroupId}" + + " xf=NAHandleTransform{col=Features}" + + " tr=FastTreeRanking{}" + + " out={" + _modelPath_MSLR + "}"; + using (var tlc = new TlcEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) { Maml.MainCore(tlc, cmd, alwaysPrintStacktrace: false); @@ -57,9 +59,15 @@ public void SetupScoringSpeedTests() } [Benchmark] - public void TrainTest_Multiclass_MSLRWeb10K_Ranking_FastTree() + public void TrainTest_Ranking_MSLRWeb10K_RawNumericFeatures_FastTreeRanking() { - string cmd = @"TrainTest test=" + _mslrWeb10k_Validate + " eval=RankingEvaluator{t=10} data=" + _mslrWeb10k_Train + " loader=TextLoader{col=Label:R4:0 col=GroupId:TX:1 col=Features:R4:2-138} xf=HashTransform{col=GroupId} xf=NAHandleTransform{col=Features} tr=FastTreeRanking{}"; + string cmd = @"TrainTest test=" + _mslrWeb10k_Validate + + " eval=RankingEvaluator{t=10}" + + " data=" + _mslrWeb10k_Train + + " loader=TextLoader{col=Label:R4:0 col=GroupId:TX:1 col=Features:R4:2-138}" + + " xf=HashTransform{col=GroupId} xf=NAHandleTransform{col=Features}" + + " tr=FastTreeRanking{}"; + using (var tlc = new TlcEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) { Maml.MainCore(tlc, cmd, alwaysPrintStacktrace: false); @@ -67,9 +75,16 @@ public void TrainTest_Multiclass_MSLRWeb10K_Ranking_FastTree() } [Benchmark] - public void TrainTest_Multiclass_MSLRWeb10K_Ranking_LightGBM() + public void TrainTest_Ranking_MSLRWeb10K_RawNumericFeatures_LightGBMRanking() { - string cmd = @"TrainTest test=" + _mslrWeb10k_Validate + " eval=RankingEvaluator{t=10} data=" + _mslrWeb10k_Train + " loader=TextLoader{col=Label:R4:0 col=GroupId:TX:1 col=Features:R4:2-138} xf=HashTransform{col=GroupId} xf=NAHandleTransform{col=Features} tr=LightGBMRanking{}"; + string cmd = @"TrainTest test=" + _mslrWeb10k_Validate + + " eval=RankingEvaluator{t=10}" + + " data=" + _mslrWeb10k_Train + + " loader=TextLoader{col=Label:R4:0 col=GroupId:TX:1 col=Features:R4:2-138}" + + " xf=HashTransform{col=GroupId}" + + " xf=NAHandleTransform{col=Features}" + + " tr=LightGBMRanking{}"; + using (var tlc = new TlcEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) { Maml.MainCore(tlc, cmd, alwaysPrintStacktrace: false); @@ -77,11 +92,10 @@ public void TrainTest_Multiclass_MSLRWeb10K_Ranking_LightGBM() } [Benchmark] - public void Test_Multiclass_MSLRWeb10K_Ranking_FastTree() + public void Test_Ranking_MSLRWeb10K_RawNumericFeatures_FastTreeRanking() { // This benchmark is profiling bulk scoring speed and not training speed. - string modelpath = Path.Combine(Directory.GetCurrentDirectory(), @"FastTreeRankingModel.zip"); - string cmd = @"Test data=" + _mslrWeb10k_Test + " in="+ modelpath; + string cmd = @"Test data=" + _mslrWeb10k_Test + " in="+ _modelPath_MSLR; using (var tlc = new TlcEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) { Maml.MainCore(tlc, cmd, alwaysPrintStacktrace: false); diff --git a/test/Microsoft.ML.Benchmarks/Text/MultiClassClassification.cs b/test/Microsoft.ML.Benchmarks/Text/MultiClassClassification.cs index 0a93b28c32..dbc997e769 100644 --- a/test/Microsoft.ML.Benchmarks/Text/MultiClassClassification.cs +++ b/test/Microsoft.ML.Benchmarks/Text/MultiClassClassification.cs @@ -8,18 +8,9 @@ using Microsoft.ML.Runtime.RunTests; using Microsoft.ML.Runtime.Tools; using System.IO; -using System.Text; namespace Microsoft.ML.Benchmarks { - // Adding this class to not print anything to the console. - // This is required for the current version of BenchmarkDotNet - internal class EmptyWriter : TextWriter - { - internal static readonly EmptyWriter Instance = new EmptyWriter(); - public override Encoding Encoding => null; - } - public class MultiClassClassification { private string _dataPath_Wiki; @@ -35,9 +26,7 @@ public void SetupTrainingSpeedTests() _dataPath_Wiki = Path.GetFullPath(TestDatasets.WikiDetox.trainFilename); if (!File.Exists(_dataPath_Wiki)) - { - throw new FileNotFoundException($"Could not find {_dataPath_Wiki} Please ensure you have run 'build.cmd -- /t:DownloadExternalTestFiles /p:IncludeBenchmarkData=true' from the root"); - } + throw new FileNotFoundException(string.Format(Helpers.DatasetNotFound, _dataPath_Wiki)); } [GlobalSetup(Target = nameof(Test_Multiclass_WikiDetox_BigramsAndTrichar_OVAAveragedPerceptron))] @@ -45,7 +34,15 @@ public void SetupScoringSpeedTests() { SetupTrainingSpeedTests(); _modelPath_Wiki = Path.Combine(Directory.GetCurrentDirectory(), @"WikiModel.zip"); - string cmd = @"CV k=5 data=" + _dataPath_Wiki + " loader=TextLoader{quote=- sparse=- col=Label:R4:0 col=rev_id:TX:1 col=comment:TX:2 col=logged_in:BL:4 col=ns:TX:5 col=sample:TX:6 col=split:TX:7 col=year:R4:3 header=+} xf=Convert{col=logged_in type=R4} xf=CategoricalTransform{col=ns} xf=TextTransform{col=FeaturesText:comment wordExtractor=NGramExtractorTransform{ngram=2}} xf=Concat{col=Features:FeaturesText,logged_in,ns} tr=OVA{p=AveragedPerceptron{iter=10}} out={" + _modelPath_Wiki + "}"; + + string cmd = @"CV k=5 data=" + _dataPath_Wiki + + " loader=TextLoader{quote=- sparse=- col=Label:R4:0 col=rev_id:TX:1 col=comment:TX:2 col=logged_in:BL:4 col=ns:TX:5 col=sample:TX:6 col=split:TX:7 col=year:R4:3 header=+} xf=Convert{col=logged_in type=R4}" + + " xf=CategoricalTransform{col=ns}" + + " xf=TextTransform{col=FeaturesText:comment wordExtractor=NGramExtractorTransform{ngram=2}}" + + " xf=Concat{col=Features:FeaturesText,logged_in,ns}" + + " tr=OVA{p=AveragedPerceptron{iter=10}}" + + " out={" + _modelPath_Wiki + "}"; + using (var tlc = new ConsoleEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) { Maml.MainCore(tlc, cmd, alwaysPrintStacktrace: false); @@ -55,7 +52,14 @@ public void SetupScoringSpeedTests() [Benchmark] public void CV_Multiclass_WikiDetox_BigramsAndTrichar_OVAAveragedPerceptron() { - string cmd = @"CV k=5 data=" + _dataPath_Wiki + " loader=TextLoader{quote=- sparse=- col=Label:R4:0 col=rev_id:TX:1 col=comment:TX:2 col=logged_in:BL:4 col=ns:TX:5 col=sample:TX:6 col=split:TX:7 col=year:R4:3 header=+} xf=Convert{col=logged_in type=R4} xf=CategoricalTransform{col=ns} xf=TextTransform{col=FeaturesText:comment wordExtractor=NGramExtractorTransform{ngram=2}} xf=Concat{col=Features:FeaturesText,logged_in,ns} tr=OVA{p=AveragedPerceptron{iter=10}}"; + string cmd = @"CV k=5 data=" + _dataPath_Wiki + + " loader=TextLoader{quote=- sparse=- col=Label:R4:0 col=rev_id:TX:1 col=comment:TX:2 col=logged_in:BL:4 col=ns:TX:5 col=sample:TX:6 col=split:TX:7 col=year:R4:3 header=+}" + + " xf=Convert{col=logged_in type=R4}" + + " xf=CategoricalTransform{col=ns}" + + " xf=TextTransform{col=FeaturesText:comment wordExtractor=NGramExtractorTransform{ngram=2}}" + + " xf=Concat{col=Features:FeaturesText,logged_in,ns}" + + " tr=OVA{p=AveragedPerceptron{iter=10}}"; + using (var tlc = new ConsoleEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) { Maml.MainCore(tlc, cmd, alwaysPrintStacktrace: false); @@ -65,7 +69,13 @@ public void CV_Multiclass_WikiDetox_BigramsAndTrichar_OVAAveragedPerceptron() [Benchmark] public void CV_Multiclass_WikiDetox_BigramsAndTrichar_LightGBMMulticlass() { - string cmd = @"CV k=5 data=" + _dataPath_Wiki + " loader=TextLoader{quote=- sparse=- col=Label:R4:0 col=rev_id:TX:1 col=comment:TX:2 col=logged_in:BL:4 col=ns:TX:5 col=sample:TX:6 col=split:TX:7 col=year:R4:3 header=+} xf=Convert{col=logged_in type=R4} xf=CategoricalTransform{col=ns} xf=TextTransform{col=FeaturesText:comment wordExtractor=NGramExtractorTransform{ngram=2}} xf=Concat{col=Features:FeaturesText,logged_in,ns} tr=LightGBMMulticlass{}"; + string cmd = @"CV k=5 data=" + _dataPath_Wiki + + " loader=TextLoader{quote=- sparse=- col=Label:R4:0 col=rev_id:TX:1 col=comment:TX:2 col=logged_in:BL:4 col=ns:TX:5 col=sample:TX:6 col=split:TX:7 col=year:R4:3 header=+}" + + " xf=Convert{col=logged_in type=R4}" + + " xf=CategoricalTransform{col=ns}" + + " xf=TextTransform{col=FeaturesText:comment wordExtractor=NGramExtractorTransform{ngram=2}}" + + " xf=Concat{col=Features:FeaturesText,logged_in,ns} tr=LightGBMMulticlass{}"; + using (var tlc = new ConsoleEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) { Maml.MainCore(tlc, cmd, alwaysPrintStacktrace: false); @@ -87,7 +97,15 @@ public void Test_Multiclass_WikiDetox_BigramsAndTrichar_OVAAveragedPerceptron() [Benchmark] public void CV_Multiclass_WikiDetox_WordEmbeddings_OVAAveragedPerceptron() { - string cmd = @"CV tr=OVA{p=AveragedPerceptron{iter=10}} k=5 loader=TextLoader{quote=- sparse=- col=Label:R4:0 col=rev_id:TX:1 col=comment:TX:2 col=logged_in:BL:4 col=ns:TX:5 col=sample:TX:6 col=split:TX:7 col=year:R4:3 header=+} data=" + _dataPath_Wiki + " xf=Convert{col=logged_in type=R4} xf=CategoricalTransform{col=ns} xf=TextTransform{col=FeaturesText:comment tokens=+ wordExtractor=NGramExtractorTransform{ngram=2}} xf=WordEmbeddingsTransform{col=FeaturesWordEmbedding:FeaturesText_TransformedText model=FastTextWikipedia300D} xf=Concat{col=Features:FeaturesText,FeaturesWordEmbedding,logged_in,ns}"; + string cmd = @"CV k=5 data=" + _dataPath_Wiki + + " tr=OVA{p=AveragedPerceptron{iter=10}}" + + " loader=TextLoader{quote=- sparse=- col=Label:R4:0 col=rev_id:TX:1 col=comment:TX:2 col=logged_in:BL:4 col=ns:TX:5 col=sample:TX:6 col=split:TX:7 col=year:R4:3 header=+}" + + " xf=Convert{col=logged_in type=R4}" + + " xf=CategoricalTransform{col=ns}" + + " xf=TextTransform{col=FeaturesText:comment tokens=+ wordExtractor=NGramExtractorTransform{ngram=2}}" + + " xf=WordEmbeddingsTransform{col=FeaturesWordEmbedding:FeaturesText_TransformedText model=FastTextWikipedia300D}" + + " xf=Concat{col=Features:FeaturesText,FeaturesWordEmbedding,logged_in,ns}"; + using (var tlc = new ConsoleEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) { Maml.MainCore(tlc, cmd, alwaysPrintStacktrace: false); @@ -97,7 +115,15 @@ public void CV_Multiclass_WikiDetox_WordEmbeddings_OVAAveragedPerceptron() [Benchmark] public void CV_Multiclass_WikiDetox_WordEmbeddings_SDCAMC() { - string cmd = @"CV tr=SDCAMC k=5 loader=TextLoader{quote=- sparse=- col=Label:R4:0 col=rev_id:TX:1 col=comment:TX:2 col=logged_in:BL:4 col=ns:TX:5 col=sample:TX:6 col=split:TX:7 col=year:R4:3 header=+} data=" + _dataPath_Wiki + " xf=Convert{col=logged_in type=R4} xf=CategoricalTransform{col=ns} xf=TextTransform{col=FeaturesText:comment tokens=+ wordExtractor={} charExtractor={}} xf=WordEmbeddingsTransform{col=FeaturesWordEmbedding:FeaturesText_TransformedText model=FastTextWikipedia300D} xf=Concat{col=Features:FeaturesWordEmbedding,logged_in,ns}"; + string cmd = @"CV k=5 data=" + _dataPath_Wiki + + " tr=SDCAMC" + + " loader=TextLoader{quote=- sparse=- col=Label:R4:0 col=rev_id:TX:1 col=comment:TX:2 col=logged_in:BL:4 col=ns:TX:5 col=sample:TX:6 col=split:TX:7 col=year:R4:3 header=+}" + + " xf=Convert{col=logged_in type=R4}" + + " xf=CategoricalTransform{col=ns}" + + " xf=TextTransform{col=FeaturesText:comment tokens=+ wordExtractor={} charExtractor={}}" + + " xf=WordEmbeddingsTransform{col=FeaturesWordEmbedding:FeaturesText_TransformedText model=FastTextWikipedia300D}" + + " xf=Concat{col=Features:FeaturesWordEmbedding,logged_in,ns}"; + using (var tlc = new ConsoleEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) { Maml.MainCore(tlc, cmd, alwaysPrintStacktrace: false); diff --git a/test/Microsoft.ML.TestFramework/Datasets.cs b/test/Microsoft.ML.TestFramework/Datasets.cs index 11e6f272a4..4400bd32b4 100644 --- a/test/Microsoft.ML.TestFramework/Datasets.cs +++ b/test/Microsoft.ML.TestFramework/Datasets.cs @@ -170,9 +170,9 @@ public static class TestDatasets public static TestDataset MSLRWeb = new TestDataset { name = "MSLRWeb", - trainFilename = "external/MSLRWeb10KTrain3.6MRows.tsv", - validFilename = "external/MSLRWeb10KValidate1.2MRows.tsv", - testFilename = "external/MSLRWeb10KTest1.2MRows.tsv" + trainFilename = "external/MSLRWeb10KTrain720kRows.tsv", + validFilename = "external/MSLRWeb10KValidate240kRows.tsv", + testFilename = "external/MSLRWeb10KTest240kRows.tsv" }; public static TestDataset winequality = new TestDataset From 6ff46806de330bf8a2ac0abf21a7adbb5e5632bc Mon Sep 17 00:00:00 2001 From: Anipik Date: Thu, 13 Sep 2018 15:06:24 -0700 Subject: [PATCH 3/8] names change, url changes and warmcount changes --- build.proj | 8 +++--- build/ExternalBenchmarkDataFiles.props | 8 +++--- .../Microsoft.ML.Benchmarks.csproj | 10 ++++---- .../Numeric/Ranking.cs | 3 ++- test/data/README.md | 25 +++++++++++++++++++ 5 files changed, 40 insertions(+), 14 deletions(-) diff --git a/build.proj b/build.proj index d88afc5b02..c01c2356b6 100644 --- a/build.proj +++ b/build.proj @@ -83,12 +83,12 @@ - - http://aka.ms/tlc-resources/benchmarks/%(Identity) + + https://tlcresources.blob.core.windows.net/mslrweb10k/%(Identity) $(MSBuildThisFileDirectory)/test/data/external/%(Identity) - + - + diff --git a/build/ExternalBenchmarkDataFiles.props b/build/ExternalBenchmarkDataFiles.props index 615cd44862..ad3d350d60 100644 --- a/build/ExternalBenchmarkDataFiles.props +++ b/build/ExternalBenchmarkDataFiles.props @@ -1,8 +1,8 @@ - - - - + + + + \ No newline at end of file diff --git a/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj b/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj index c5bd622890..6bdece79a4 100644 --- a/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj +++ b/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj @@ -1,5 +1,5 @@  - + Exe 7.2 @@ -37,12 +37,12 @@ PreserveNewest - + external\%(Identity) - + - + PreserveNewest diff --git a/test/Microsoft.ML.Benchmarks/Numeric/Ranking.cs b/test/Microsoft.ML.Benchmarks/Numeric/Ranking.cs index b1f93e0384..a3f53f4b32 100644 --- a/test/Microsoft.ML.Benchmarks/Numeric/Ranking.cs +++ b/test/Microsoft.ML.Benchmarks/Numeric/Ranking.cs @@ -1,4 +1,4 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. @@ -11,6 +11,7 @@ namespace Microsoft.ML.Benchmarks { + [WarmupCount(8)] // It helps to reduce the standard deviation of these tests. public class Ranking { private string _mslrWeb10k_Validate; diff --git a/test/data/README.md b/test/data/README.md index ea9133e33e..b0d21d5268 100644 --- a/test/data/README.md +++ b/test/data/README.md @@ -46,6 +46,31 @@ Redistributing the dataset "taxi-fare-test.csv", "taxi-fare-train.csv" with attr > > The dataset is provided under terms provided by City of New York: https://opendata.cityofnewyork.us/overview/#termsofuse. +### MSLRWeb10k + +This dataset is originally from [Introducing LETOR 4.0 Datasets](http://arxiv.org/abs/1306.2597). +The dataset is under a CC-by 4.0 license. + +> @article{DBLP:journals/corr/QinL13, + +> author = {Tao Qin and Tie{-}Yan Liu}, + +> title = {Introducing {LETOR} 4.0 Datasets}, + +> journal = {CoRR}, + +> volume = {abs/1306.2597}, + +> year = {2013}, + +> url = {http://arxiv.org/abs/1306.2597}, + +> timestamp = {Mon, 01 Jul 2013 20:31:25 +0200}, + +> biburl = {http://dblp.uni-trier.de/rec/bib/journals/corr/QinL13}, + +> bibsource = {dblp computer science bibliography, http://dblp.org} + # Images ### Located in `images` folder From 599f7198fc0728f9cacd47c6514b6450c81b3955 Mon Sep 17 00:00:00 2001 From: Anipik Date: Mon, 17 Sep 2018 11:39:58 -0700 Subject: [PATCH 4/8] url corrected, https corrected and extra lines removed --- build.proj | 2 +- .../Microsoft.ML.Benchmarks.csproj | 2 +- test/Microsoft.ML.Benchmarks/Numeric/Ranking.cs | 1 - test/data/README.md | 16 +++------------- 4 files changed, 5 insertions(+), 16 deletions(-) diff --git a/build.proj b/build.proj index c01c2356b6..26a68f7b40 100644 --- a/build.proj +++ b/build.proj @@ -84,7 +84,7 @@ - https://tlcresources.blob.core.windows.net/mslrweb10k/%(Identity) + https://aka.ms/tlc-resources/benchmarks/%(Identity) $(MSBuildThisFileDirectory)/test/data/external/%(Identity) diff --git a/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj b/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj index 6bdece79a4..44eaec093e 100644 --- a/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj +++ b/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj @@ -44,6 +44,6 @@ PreserveNewest - + \ No newline at end of file diff --git a/test/Microsoft.ML.Benchmarks/Numeric/Ranking.cs b/test/Microsoft.ML.Benchmarks/Numeric/Ranking.cs index a3f53f4b32..166ec98104 100644 --- a/test/Microsoft.ML.Benchmarks/Numeric/Ranking.cs +++ b/test/Microsoft.ML.Benchmarks/Numeric/Ranking.cs @@ -11,7 +11,6 @@ namespace Microsoft.ML.Benchmarks { - [WarmupCount(8)] // It helps to reduce the standard deviation of these tests. public class Ranking { private string _mslrWeb10k_Validate; diff --git a/test/data/README.md b/test/data/README.md index b0d21d5268..1cc15397c7 100644 --- a/test/data/README.md +++ b/test/data/README.md @@ -46,29 +46,19 @@ Redistributing the dataset "taxi-fare-test.csv", "taxi-fare-train.csv" with attr > > The dataset is provided under terms provided by City of New York: https://opendata.cityofnewyork.us/overview/#termsofuse. -### MSLRWeb10k +### MSLR-WEB10K, MSLR-WEB30K -This dataset is originally from [Introducing LETOR 4.0 Datasets](http://arxiv.org/abs/1306.2597). +This dataset is originally from [Introducing LETOR 4.0 Datasets](https://arxiv.org/abs/1306.2597). The dataset is under a CC-by 4.0 license. - > @article{DBLP:journals/corr/QinL13, - > author = {Tao Qin and Tie{-}Yan Liu}, - > title = {Introducing {LETOR} 4.0 Datasets}, - > journal = {CoRR}, - > volume = {abs/1306.2597}, - > year = {2013}, - -> url = {http://arxiv.org/abs/1306.2597}, - +> url = {https://arxiv.org/abs/1306.2597}, > timestamp = {Mon, 01 Jul 2013 20:31:25 +0200}, - > biburl = {http://dblp.uni-trier.de/rec/bib/journals/corr/QinL13}, - > bibsource = {dblp computer science bibliography, http://dblp.org} # Images From fe017020e592c7d344f38b0fb5b7d14eebce30e6 Mon Sep 17 00:00:00 2001 From: Anipik Date: Mon, 17 Sep 2018 15:42:44 -0700 Subject: [PATCH 5/8] tlc changed to console environment --- .../Numeric/Ranking.cs | 16 ++++++------- .../Text/MultiClassClassification.cs | 24 +++++++++---------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/test/Microsoft.ML.Benchmarks/Numeric/Ranking.cs b/test/Microsoft.ML.Benchmarks/Numeric/Ranking.cs index 166ec98104..a8c008ab04 100644 --- a/test/Microsoft.ML.Benchmarks/Numeric/Ranking.cs +++ b/test/Microsoft.ML.Benchmarks/Numeric/Ranking.cs @@ -52,9 +52,9 @@ public void SetupScoringSpeedTests() " tr=FastTreeRanking{}" + " out={" + _modelPath_MSLR + "}"; - using (var tlc = new TlcEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) + using (var environment = new ConsoleEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) { - Maml.MainCore(tlc, cmd, alwaysPrintStacktrace: false); + Maml.MainCore(environment, cmd, alwaysPrintStacktrace: false); } } @@ -68,9 +68,9 @@ public void TrainTest_Ranking_MSLRWeb10K_RawNumericFeatures_FastTreeRanking() " xf=HashTransform{col=GroupId} xf=NAHandleTransform{col=Features}" + " tr=FastTreeRanking{}"; - using (var tlc = new TlcEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) + using (var environment = new ConsoleEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) { - Maml.MainCore(tlc, cmd, alwaysPrintStacktrace: false); + Maml.MainCore(environment, cmd, alwaysPrintStacktrace: false); } } @@ -85,9 +85,9 @@ public void TrainTest_Ranking_MSLRWeb10K_RawNumericFeatures_LightGBMRanking() " xf=NAHandleTransform{col=Features}" + " tr=LightGBMRanking{}"; - using (var tlc = new TlcEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) + using (var environment = new ConsoleEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) { - Maml.MainCore(tlc, cmd, alwaysPrintStacktrace: false); + Maml.MainCore(environment, cmd, alwaysPrintStacktrace: false); } } @@ -96,9 +96,9 @@ public void Test_Ranking_MSLRWeb10K_RawNumericFeatures_FastTreeRanking() { // This benchmark is profiling bulk scoring speed and not training speed. string cmd = @"Test data=" + _mslrWeb10k_Test + " in="+ _modelPath_MSLR; - using (var tlc = new TlcEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) + using (var environment = new ConsoleEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) { - Maml.MainCore(tlc, cmd, alwaysPrintStacktrace: false); + Maml.MainCore(environment, cmd, alwaysPrintStacktrace: false); } } } diff --git a/test/Microsoft.ML.Benchmarks/Text/MultiClassClassification.cs b/test/Microsoft.ML.Benchmarks/Text/MultiClassClassification.cs index dbc997e769..364a32f0bf 100644 --- a/test/Microsoft.ML.Benchmarks/Text/MultiClassClassification.cs +++ b/test/Microsoft.ML.Benchmarks/Text/MultiClassClassification.cs @@ -43,9 +43,9 @@ public void SetupScoringSpeedTests() " tr=OVA{p=AveragedPerceptron{iter=10}}" + " out={" + _modelPath_Wiki + "}"; - using (var tlc = new ConsoleEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) + using (var environment = new ConsoleEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) { - Maml.MainCore(tlc, cmd, alwaysPrintStacktrace: false); + Maml.MainCore(environment, cmd, alwaysPrintStacktrace: false); } } @@ -60,9 +60,9 @@ public void CV_Multiclass_WikiDetox_BigramsAndTrichar_OVAAveragedPerceptron() " xf=Concat{col=Features:FeaturesText,logged_in,ns}" + " tr=OVA{p=AveragedPerceptron{iter=10}}"; - using (var tlc = new ConsoleEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) + using (var environment = new ConsoleEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) { - Maml.MainCore(tlc, cmd, alwaysPrintStacktrace: false); + Maml.MainCore(environment, cmd, alwaysPrintStacktrace: false); } } @@ -76,9 +76,9 @@ public void CV_Multiclass_WikiDetox_BigramsAndTrichar_LightGBMMulticlass() " xf=TextTransform{col=FeaturesText:comment wordExtractor=NGramExtractorTransform{ngram=2}}" + " xf=Concat{col=Features:FeaturesText,logged_in,ns} tr=LightGBMMulticlass{}"; - using (var tlc = new ConsoleEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) + using (var environment = new ConsoleEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) { - Maml.MainCore(tlc, cmd, alwaysPrintStacktrace: false); + Maml.MainCore(environment, cmd, alwaysPrintStacktrace: false); } } @@ -88,9 +88,9 @@ public void Test_Multiclass_WikiDetox_BigramsAndTrichar_OVAAveragedPerceptron() // This benchmark is profiling bulk scoring speed and not training speed. string modelpath = Path.Combine(Directory.GetCurrentDirectory(), @"WikiModel.fold000.zip"); string cmd = @"Test data=" + _dataPath_Wiki + " in=" + modelpath; - using (var tlc = new ConsoleEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) + using (var environment = new ConsoleEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) { - Maml.MainCore(tlc, cmd, alwaysPrintStacktrace: false); + Maml.MainCore(environment, cmd, alwaysPrintStacktrace: false); } } @@ -106,9 +106,9 @@ public void CV_Multiclass_WikiDetox_WordEmbeddings_OVAAveragedPerceptron() " xf=WordEmbeddingsTransform{col=FeaturesWordEmbedding:FeaturesText_TransformedText model=FastTextWikipedia300D}" + " xf=Concat{col=Features:FeaturesText,FeaturesWordEmbedding,logged_in,ns}"; - using (var tlc = new ConsoleEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) + using (var environment = new ConsoleEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) { - Maml.MainCore(tlc, cmd, alwaysPrintStacktrace: false); + Maml.MainCore(environment, cmd, alwaysPrintStacktrace: false); } } @@ -124,9 +124,9 @@ public void CV_Multiclass_WikiDetox_WordEmbeddings_SDCAMC() " xf=WordEmbeddingsTransform{col=FeaturesWordEmbedding:FeaturesText_TransformedText model=FastTextWikipedia300D}" + " xf=Concat{col=Features:FeaturesWordEmbedding,logged_in,ns}"; - using (var tlc = new ConsoleEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) + using (var environment = new ConsoleEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) { - Maml.MainCore(tlc, cmd, alwaysPrintStacktrace: false); + Maml.MainCore(environment, cmd, alwaysPrintStacktrace: false); } } } From 638d0c8536d37a0918ccfbcf56a3781525ae18f9 Mon Sep 17 00:00:00 2001 From: Anipik Date: Mon, 17 Sep 2018 22:19:24 -0700 Subject: [PATCH 6/8] https and closing brace added --- test/data/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/data/README.md b/test/data/README.md index 1cc15397c7..6dac9a17a9 100644 --- a/test/data/README.md +++ b/test/data/README.md @@ -58,8 +58,8 @@ The dataset is under a CC-by 4.0 license. > year = {2013}, > url = {https://arxiv.org/abs/1306.2597}, > timestamp = {Mon, 01 Jul 2013 20:31:25 +0200}, -> biburl = {http://dblp.uni-trier.de/rec/bib/journals/corr/QinL13}, -> bibsource = {dblp computer science bibliography, http://dblp.org} +> biburl = {https://dblp.uni-trier.de/rec/bib/journals/corr/QinL13}, +> bibsource = {dblp computer science bibliography, https://dblp.org}} # Images From 2616671dd862450018069dcdb441f1885cb12f72 Mon Sep 17 00:00:00 2001 From: Justin Ormont Date: Mon, 17 Sep 2018 22:39:54 -0700 Subject: [PATCH 7/8] Code block for MSLR-WEB10K/MSLR-WEB30K to format citation --- test/data/README.md | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/test/data/README.md b/test/data/README.md index 6dac9a17a9..165c928ba7 100644 --- a/test/data/README.md +++ b/test/data/README.md @@ -50,16 +50,20 @@ Redistributing the dataset "taxi-fare-test.csv", "taxi-fare-train.csv" with attr This dataset is originally from [Introducing LETOR 4.0 Datasets](https://arxiv.org/abs/1306.2597). The dataset is under a CC-by 4.0 license. -> @article{DBLP:journals/corr/QinL13, -> author = {Tao Qin and Tie{-}Yan Liu}, -> title = {Introducing {LETOR} 4.0 Datasets}, -> journal = {CoRR}, -> volume = {abs/1306.2597}, -> year = {2013}, -> url = {https://arxiv.org/abs/1306.2597}, -> timestamp = {Mon, 01 Jul 2013 20:31:25 +0200}, -> biburl = {https://dblp.uni-trier.de/rec/bib/journals/corr/QinL13}, -> bibsource = {dblp computer science bibliography, https://dblp.org}} +``` +@article{DBLP:journals/corr/QinL13, + author = {Tao Qin and + Tie{-}Yan Liu}, + title = {Introducing {LETOR} 4.0 Datasets}, + journal = {CoRR}, + volume = {abs/1306.2597}, + year = {2013}, + url = {https://arxiv.org/abs/1306.2597}, + timestamp = {Mon, 01 Jul 2013 20:31:25 +0200}, + biburl = {https://dblp.uni-trier.de/rec/bib/journals/corr/QinL13}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} +``` # Images From eb738d23c586d1213c35acea9137d3b1bbc1763a Mon Sep 17 00:00:00 2001 From: Justin Ormont Date: Wed, 19 Sep 2018 10:07:45 -0700 Subject: [PATCH 8/8] Add missing semicolon Missing semicolon is causing the build the fail: ``` 2018-09-19T04:41:39.2181028Z Datasets.cs(171,10): error CS1002: ; expected [/__w/3/s/test/Microsoft.ML.TestFramework/Microsoft.ML.TestFramework.csproj] 2018-09-19T04:41:39.7812509Z Microsoft.ML.StandardLearners -> /__w/3/s/bin/AnyCPU.Debug/Microsoft.ML.StandardLearners/netstandard2.0/Microsoft.ML.StandardLearners.dll 2018-09-19T04:41:40.7120753Z Microsoft.ML.HalLearners -> /__w/3/s/bin/AnyCPU.Debug/Microsoft.ML.HalLearners/netstandard2.0/Microsoft.ML.HalLearners.dll 2018-09-19T04:41:40.8804119Z Microsoft.ML.Ensemble -> /__w/3/s/bin/AnyCPU.Debug/Microsoft.ML.Ensemble/netstandard2.0/Microsoft.ML.Ensemble.dll 2018-09-19T04:41:40.9555420Z Microsoft.ML.LightGBM -> /__w/3/s/bin/AnyCPU.Debug/Microsoft.ML.LightGBM/netstandard2.0/Microsoft.ML.LightGBM.dll 2018-09-19T04:41:41.5610322Z Microsoft.ML.PipelineInference -> /__w/3/s/bin/AnyCPU.Debug/Microsoft.ML.PipelineInference/netstandard2.0/Microsoft.ML.PipelineInference.dll 2018-09-19T04:41:42.4887819Z Microsoft.ML.Console -> /__w/3/s/bin/AnyCPU.Debug/Microsoft.ML.Console/netcoreapp2.0/MML.dll 2018-09-19T04:41:45.7637388Z Microsoft.ML.FSharp.Tests -> /__w/3/s/bin/AnyCPU.Debug/Microsoft.ML.FSharp.Tests/netcoreapp2.1/Microsoft.ML.FSharp.Tests.dll 2018-09-19T04:41:45.7926386Z /__w/3/s/dir.traversal.targets(25,5): error : Build failed. See earlier errors. [/__w/3/s/build.proj] 2018-09-19T04:41:45.8133725Z 2018-09-19T04:41:45.8152732Z Build FAILED. ``` --- test/Microsoft.ML.TestFramework/Datasets.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Microsoft.ML.TestFramework/Datasets.cs b/test/Microsoft.ML.TestFramework/Datasets.cs index 17ad5eada4..b9d0cad9a5 100644 --- a/test/Microsoft.ML.TestFramework/Datasets.cs +++ b/test/Microsoft.ML.TestFramework/Datasets.cs @@ -168,7 +168,7 @@ public static class TestDatasets trainFilename = "external/MSLRWeb10KTrain720kRows.tsv", validFilename = "external/MSLRWeb10KValidate240kRows.tsv", testFilename = "external/MSLRWeb10KTest240kRows.tsv" - } + }; public static TestDataset Sentiment = new TestDataset {