From 59d1a080e17273afbf1d8318e9e0fffba848d74e Mon Sep 17 00:00:00 2001 From: Zeeshan Ahmed Date: Thu, 7 Feb 2019 18:51:31 -0800 Subject: [PATCH 01/14] Lockdown of Microsoft.ML.LightGBM public surface. --- .../LightGBM/LightGBMBinaryClassification.cs | 103 ++++++++++++++++++ .../LightGBMMulticlassClassification.cs | 81 ++++++++++++++ .../Dynamic/LightGBM/LightGBMRegression.cs | 71 ++++++++++++ .../LightGbmArguments.cs | 3 +- src/Microsoft.ML.LightGBM/LightGbmCatalog.cs | 21 ++++ .../LightGbmRankingTrainer.cs | 6 +- .../Parallel/IParallel.cs | 4 +- .../Parallel/SingleTrainer.cs | 16 +-- 8 files changed, 291 insertions(+), 14 deletions(-) create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/LightGBM/LightGBMBinaryClassification.cs create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/LightGBM/LightGBMMulticlassClassification.cs create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/LightGBM/LightGBMRegression.cs diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/LightGBM/LightGBMBinaryClassification.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/LightGBM/LightGBMBinaryClassification.cs new file mode 100644 index 0000000000..2df1ec314d --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/LightGBM/LightGBMBinaryClassification.cs @@ -0,0 +1,103 @@ +using System; +using Microsoft.ML.Data; +using Microsoft.ML.Transforms.Categorical; + +namespace Microsoft.ML.Samples.Dynamic.LightGBM +{ + public class LightGbmBinaryClassification + { + public static void LightGbmBinaryClassificationExample() + { + // Downloading a classification dataset from github.com/dotnet/machinelearning. + // It will be stored in the same path as the executable + string dataFilePath = SamplesUtils.DatasetUtils.DownloadAdultDataset(); + + // Data Preview + // 1. Column: age (numeric) + // 2. Column: workclass (text/categorical) + // 3. Column: fnlwgt (numeric) + // 4. Column: education (text/categorical) + // 5. Column: education-num (numeric) + // 6. Column: marital-status (text/categorical) + // 7. Column: occupation (text/categorical) + // 8. Column: relationship (text/categorical) + // 9. Column: ethnicity (text/categorical) + // 10. Column: sex (text/categorical) + // 11. Column: capital-gain (numeric) + // 12. Column: capital-loss (numeric) + // 13. Column: hours-per-week (numeric) + // 14. Column: native-country (text/categorical) + // 15. Column: Column [Label]: IsOver50K (boolean) + + // Creating the ML.Net IHostEnvironment object, needed for the pipeline + var mlContext = new MLContext(); + + var reader = mlContext.Data.ReadFromTextFile(dataFilePath, new TextLoader.Arguments + { + Separators = new[] { ',' }, + HasHeader = true, + Columns = new[] + { + new TextLoader.Column("age", DataKind.R4, 0), + new TextLoader.Column("workclass", DataKind.Text, 1), + new TextLoader.Column("fnlwgt", DataKind.R4, 2), + new TextLoader.Column("education", DataKind.Text, 3), + new TextLoader.Column("education-num", DataKind.R4, 4), + new TextLoader.Column("marital-status", DataKind.Text, 5), + new TextLoader.Column("occupation", DataKind.Text, 6), + new TextLoader.Column("relationship", DataKind.Text, 7), + new TextLoader.Column("ethnicity", DataKind.Text, 8), + new TextLoader.Column("sex", DataKind.Text, 9), + new TextLoader.Column("capital-gain", DataKind.R4, 10), + new TextLoader.Column("capital-loss", DataKind.R4, 11), + new TextLoader.Column("hours-per-week", DataKind.R4, 12), + new TextLoader.Column("native-country", DataKind.Text, 13), + new TextLoader.Column("Label", DataKind.Bool, 14) + } + }); + + // Read the data, and leave 10% out, so we can use them for testing + var (trainData, testData) = mlContext.BinaryClassification.TrainTestSplit(reader, testFraction: 0.1); + + // Create the Estimator + var pipeline = mlContext.Transforms.Categorical.OneHotEncoding(new OneHotEncodingEstimator.ColumnInfo[] + { + new OneHotEncodingEstimator.ColumnInfo("marital-status"), + new OneHotEncodingEstimator.ColumnInfo("occupation"), + new OneHotEncodingEstimator.ColumnInfo("relationship"), + new OneHotEncodingEstimator.ColumnInfo("ethnicity"), + new OneHotEncodingEstimator.ColumnInfo("sex"), + new OneHotEncodingEstimator.ColumnInfo("native-country"), + }) + .Append(mlContext.Transforms.FeatureSelection.SelectFeaturesBasedOnCount("native-country", count: 10)) + .Append(mlContext.Transforms.Concatenate("Features", + "age", + "education-num", + "marital-status", + "relationship", + "ethnicity", + "sex", + "hours-per-week", + "native-country")) + .Append(mlContext.Transforms.Normalize("Features")) + .Append(mlContext.BinaryClassification.Trainers.LightGbm()); + + // Fit this Pipeline to the Training Data + var model = pipeline.Fit(trainData); + + // Evaluate how the model is doing on the test data + var dataWithPredictions = model.Transform(testData); + + var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions); + + Console.WriteLine($"Accuracy: {metrics.Accuracy}"); // 0.84 + Console.WriteLine($"AUC: {metrics.Auc}"); // 0.88 + Console.WriteLine($"F1 Score: {metrics.F1Score}"); // 0.62 + + Console.WriteLine($"Negative Precision: {metrics.NegativePrecision}"); // 0.88 + Console.WriteLine($"Negative Recall: {metrics.NegativeRecall}"); // 0.91 + Console.WriteLine($"Positive Precision: {metrics.PositivePrecision}"); // 0.67 + Console.WriteLine($"Positive Recall: {metrics.PositiveRecall}"); // 0.58 + } + } +} \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/LightGBM/LightGBMMulticlassClassification.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/LightGBM/LightGBMMulticlassClassification.cs new file mode 100644 index 0000000000..36d2467a71 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/LightGBM/LightGBMMulticlassClassification.cs @@ -0,0 +1,81 @@ +using System; +using System.Linq; +using Microsoft.ML.Data; +using Microsoft.ML.SamplesUtils; + +namespace Microsoft.ML.Samples.Dynamic.LightGBM +{ + class LightGbmMulticlassClassification + { + public static void LightGbmMulticlassClassificationExample() + { + // Create a general context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + var mlContext = new MLContext(); + + // Create in-memory examples as C# native class. + var examples = DatasetUtils.GenerateRandomMulticlassClassificationExamples(1000); + + // Convert native C# class to IDataView, a consumble format to ML.NET functions. + var dataView = mlContext.Data.ReadFromEnumerable(examples); + + // Create a pipeline. + // - Convert the string labels into key types. + // - Apply LightGbm multiclass trainer + var pipeline = mlContext.Transforms.Conversion.MapValueToKey("LabelIndex", "Label") + .Append(mlContext.MulticlassClassification.Trainers.LightGbm(labelColumn: "LabelIndex")) + .Append(mlContext.Transforms.Conversion.MapValueToKey("PredictedLabelIndex", "PredictedLabel")) + .Append(mlContext.Transforms.CopyColumns("Scores", "Score")); + + // Split the static-typed data into training and test sets. Only training set is used in fitting + // the created pipeline. Metrics are computed on the test. + var (trainingData, testingData) = mlContext.MulticlassClassification.TrainTestSplit(dataView, testFraction: 0.5); + + // Train the model. + var model = pipeline.Fit(trainingData); + + // Do prediction on the test set. + var dataWithPredictions = model.Transform(testingData); + + // Evaluate the trained model is the test set. + var metrics = mlContext.MulticlassClassification.Evaluate(dataWithPredictions, label: "LabelIndex"); + + // Check if metrics are resonable. + Console.WriteLine("Macro accuracy: {0}, Micro accuracy: {1}.", 0.863482146891263, 0.86309523809523814); + + // Convert prediction in ML.NET format to native C# class. + var nativePredictions = mlContext.CreateEnumerable(dataWithPredictions, false).ToList(); + + // Get schema object out of the prediction. It contains metadata such as the mapping from predicted label index + // (e.g., 1) to its actual label (e.g., "AA"). The call to "AsDynamic" converts our statically-typed pipeline into + // a dynamically-typed one only for extracting metadata. In the future, metadata in statically-typed pipeline should + // be accessible without dynamically-typed things. + var schema = dataWithPredictions.Schema; + + // Retrieve the mapping from labels to label indexes. + var labelBuffer = new VBuffer>(); + schema[nameof(DatasetUtils.MulticlassClassificationExample.PredictedLabelIndex)].Metadata.GetValue("KeyValues", ref labelBuffer); + // nativeLabels is { "AA" , "BB", "CC", "DD" } + var nativeLabels = labelBuffer.DenseValues().ToArray(); // nativeLabels[nativePrediction.PredictedLabelIndex - 1] is the original label indexed by nativePrediction.PredictedLabelIndex. + + + // Show prediction result for the 3rd example. + var nativePrediction = nativePredictions[2]; + // Console output: + // Our predicted label to this example is "AA" with probability 0.922597349. + Console.WriteLine("Our predicted label to this example is {0} with probability {1}", + nativeLabels[(int)nativePrediction.PredictedLabelIndex - 1], + nativePrediction.Scores[(int)nativePrediction.PredictedLabelIndex - 1]); + + var expectedProbabilities = new float[] { 0.922597349f, 0.07508608f, 0.00221699756f, 9.95488E-05f }; + // Scores and nativeLabels are two parallel attributes; that is, Scores[i] is the probability of being nativeLabels[i]. + // Console output: + // The probability of being class "AA" is 0.922597349. + // The probability of being class "BB" is 0.07508608. + // The probability of being class "CC" is 0.00221699756. + // The probability of being class "DD" is 9.95488E-05. + for (int i = 0; i < labelBuffer.Length; ++i) + Console.WriteLine("The probability of being class {0} is {1}.", nativeLabels[i], nativePrediction.Scores[i]); + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/LightGBM/LightGBMRegression.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/LightGBM/LightGBMRegression.cs new file mode 100644 index 0000000000..669c0a40fb --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/LightGBM/LightGBMRegression.cs @@ -0,0 +1,71 @@ +using System; +using Microsoft.ML.Data; + +namespace Microsoft.ML.Samples.Dynamic.LightGBM +{ + class LightGbmRegression + { + public static void LightGbmRegressionExample() + { + // Downloading a regression dataset from github.com/dotnet/machinelearning + // this will create a housing.txt file in the filsystem this code will run + // you can open the file to see the data. + string dataFile = SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset(); + + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var mlContext = new MLContext(); + + // Creating a data reader, based on the format of the data + // The data is tab separated with all numeric columns. + // The first column being the label and rest are numeric features + // Here only seven numeric columns are used as features + var dataView = mlContext.Data.ReadFromTextFile(dataFile, new TextLoader.Arguments + { + Separators = new[] { '\t' }, + HasHeader = true, + Columns = new[] + { + new TextLoader.Column("Label", DataKind.R4, 0), + new TextLoader.Column("Features", DataKind.R4, 1, 6) + } + }); + + //////////////////// Data Preview //////////////////// + // MedianHomeValue CrimesPerCapita PercentResidental PercentNonRetail CharlesRiver NitricOxides RoomsPerDwelling PercentPre40s + // 24.00 0.00632 18.00 2.310 0 0.5380 6.5750 65.20 + // 21.60 0.02731 00.00 7.070 0 0.4690 6.4210 78.90 + // 34.70 0.02729 00.00 7.070 0 0.4690 7.1850 61.10 + + var (trainData, testData) = mlContext.Regression.TrainTestSplit(dataView, testFraction: 0.1); + + // Create the estimator, here we only need LightGbm trainer + // as data is already processed in a form consumable by the trainer + var pipeline = mlContext.Regression.Trainers.LightGbm( + numLeaves: 4, + minDataPerLeaf: 6, + learningRate: 0.001); + + // Fit this pipeline to the training data + var model = pipeline.Fit(trainData); + + // Check the weights that the model learned + VBuffer weights = default; + model.Model.GetFeatureWeights(ref weights); + + var weightsValues = weights.GetValues(); + Console.WriteLine($"weight 0 - {weightsValues[0]}"); // CrimesPerCapita (weight 0) = 0.1898361 + Console.WriteLine($"weight 1 - {weightsValues[5]}"); // RoomsPerDwelling (weight 1) = 1 + + // Evaluate how the model is doing on the test data + var dataWithPredictions = model.Transform(testData); + var metrics = mlContext.Regression.Evaluate(dataWithPredictions); + + Console.WriteLine($"L1 - {metrics.L1}"); // 4.9669731 + Console.WriteLine($"L2 - {metrics.L2}"); // 51.37296 + Console.WriteLine($"LossFunction - {metrics.LossFn}"); // 51.37296 + Console.WriteLine($"RMS - {metrics.Rms}"); // 7.167493 + Console.WriteLine($"RSquared - {metrics.RSquared}"); // 0.079478 + } + } +} diff --git a/src/Microsoft.ML.LightGBM/LightGbmArguments.cs b/src/Microsoft.ML.LightGBM/LightGbmArguments.cs index dbfa868847..da8905b0ca 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmArguments.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmArguments.cs @@ -24,12 +24,13 @@ namespace Microsoft.ML.LightGBM { - public delegate void SignatureLightGBMBooster(); + internal delegate void SignatureLightGBMBooster(); [TlcModule.ComponentKind("BoosterParameterFunction")] public interface ISupportBoosterParameterFactory : IComponentFactory { } + public interface IBoosterParameter { void UpdateParameters(Dictionary res); diff --git a/src/Microsoft.ML.LightGBM/LightGbmCatalog.cs b/src/Microsoft.ML.LightGBM/LightGbmCatalog.cs index 392f4e084f..5153f0897d 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmCatalog.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmCatalog.cs @@ -24,6 +24,13 @@ public static class LightGbmExtensions /// Number of iterations. /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. /// The learning rate. + /// + /// + /// + /// + /// public static LightGbmRegressorTrainer LightGbm(this RegressionCatalog.RegressionTrainers catalog, string labelColumn = DefaultColumnNames.Label, string featureColumn = DefaultColumnNames.Features, @@ -62,6 +69,13 @@ public static LightGbmRegressorTrainer LightGbm(this RegressionCatalog.Regressio /// Number of iterations. /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. /// The learning rate. + /// + /// + /// + /// + /// public static LightGbmBinaryTrainer LightGbm(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, string labelColumn = DefaultColumnNames.Label, string featureColumn = DefaultColumnNames.Features, @@ -140,6 +154,13 @@ public static LightGbmRankingTrainer LightGbm(this RankingCatalog.RankingTrainer /// Number of iterations. /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. /// The learning rate. + /// + /// + /// + /// + /// public static LightGbmMulticlassTrainer LightGbm(this MulticlassClassificationCatalog.MulticlassClassificationTrainers catalog, string labelColumn = DefaultColumnNames.Label, string featureColumn = DefaultColumnNames.Features, diff --git a/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs index 04e78e7a4d..bcde2c58ad 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs @@ -76,9 +76,9 @@ private static LightGbmRankingModelParameters Create(IHostEnvironment env, Model /// public sealed class LightGbmRankingTrainer : LightGbmTrainerBase, LightGbmRankingModelParameters> { - public const string UserName = "LightGBM Ranking"; - public const string LoadNameValue = "LightGBMRanking"; - public const string ShortName = "LightGBMRank"; + internal const string UserName = "LightGBM Ranking"; + internal const string LoadNameValue = "LightGBMRanking"; + internal const string ShortName = "LightGBMRank"; public override PredictionKind PredictionKind => PredictionKind.Ranking; diff --git a/src/Microsoft.ML.LightGBM/Parallel/IParallel.cs b/src/Microsoft.ML.LightGBM/Parallel/IParallel.cs index 8f3f005741..522632a619 100644 --- a/src/Microsoft.ML.LightGBM/Parallel/IParallel.cs +++ b/src/Microsoft.ML.LightGBM/Parallel/IParallel.cs @@ -12,12 +12,12 @@ namespace Microsoft.ML.LightGBM /// /// Signature of LightGBM IAllreduce /// - public delegate void SignatureParallelTrainer(); + internal delegate void SignatureParallelTrainer(); /// /// Reduce function define in LightGBM Cpp side /// - public unsafe delegate void ReduceFunction(byte* src, byte* output, int typeSize, int arraySize); + internal unsafe delegate void ReduceFunction(byte* src, byte* output, int typeSize, int arraySize); /// /// Definition of ReduceScatter funtion diff --git a/src/Microsoft.ML.LightGBM/Parallel/SingleTrainer.cs b/src/Microsoft.ML.LightGBM/Parallel/SingleTrainer.cs index 8041fb3d6f..096e2277ef 100644 --- a/src/Microsoft.ML.LightGBM/Parallel/SingleTrainer.cs +++ b/src/Microsoft.ML.LightGBM/Parallel/SingleTrainer.cs @@ -13,41 +13,41 @@ namespace Microsoft.ML.LightGBM { - public sealed class SingleTrainer : IParallel + internal sealed class SingleTrainer : IParallel { - public AllgatherFunction GetAllgatherFunction() + AllgatherFunction IParallel.GetAllgatherFunction() { return null; } - public ReduceScatterFunction GetReduceScatterFunction() + ReduceScatterFunction IParallel.GetReduceScatterFunction() { return null; } - public int NumMachines() + int IParallel.NumMachines() { return 1; } - public string ParallelType() + string IParallel.ParallelType() { return "serial"; } - public int Rank() + int IParallel.Rank() { return 0; } - public Dictionary AdditionalParams() + Dictionary IParallel.AdditionalParams() { return null; } } [TlcModule.Component(Name = "Single", Desc = "Single node machine learning process.")] - public sealed class SingleTrainerFactory : ISupportParallel + internal sealed class SingleTrainerFactory : ISupportParallel { public IParallel CreateComponent(IHostEnvironment env) => new SingleTrainer(); } From a6c3357eb280ce95ddd65f21f6d2dc5ce801bc36 Mon Sep 17 00:00:00 2001 From: zeahmed Date: Mon, 11 Feb 2019 16:23:32 -0800 Subject: [PATCH 02/14] Addressed reviewers' comments. --- .../LightGBM/LightGBMBinaryClassification.cs | 103 ------------------ .../LightGBMBinaryClassification.cs | 62 +++++++++++ .../LightGBMMulticlassClassification.cs | 34 +++--- .../Regression}/LightGBMRegression.cs | 9 +- .../LightGbmStaticExtensions.cs | 6 +- .../LightGbmArguments.cs | 29 ++--- .../Parallel/IParallel.cs | 8 +- src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs | 21 ++++ .../SamplesDatasetUtils.cs | 32 ++++++ 9 files changed, 161 insertions(+), 143 deletions(-) delete mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/LightGBM/LightGBMBinaryClassification.cs create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassification.cs rename docs/samples/Microsoft.ML.Samples/Dynamic/{LightGBM => Trainers/MulticlassClassification}/LightGBMMulticlassClassification.cs (72%) rename docs/samples/Microsoft.ML.Samples/Dynamic/{LightGBM => Trainers/Regression}/LightGBMRegression.cs (92%) create mode 100644 src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/LightGBM/LightGBMBinaryClassification.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/LightGBM/LightGBMBinaryClassification.cs deleted file mode 100644 index 2df1ec314d..0000000000 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/LightGBM/LightGBMBinaryClassification.cs +++ /dev/null @@ -1,103 +0,0 @@ -using System; -using Microsoft.ML.Data; -using Microsoft.ML.Transforms.Categorical; - -namespace Microsoft.ML.Samples.Dynamic.LightGBM -{ - public class LightGbmBinaryClassification - { - public static void LightGbmBinaryClassificationExample() - { - // Downloading a classification dataset from github.com/dotnet/machinelearning. - // It will be stored in the same path as the executable - string dataFilePath = SamplesUtils.DatasetUtils.DownloadAdultDataset(); - - // Data Preview - // 1. Column: age (numeric) - // 2. Column: workclass (text/categorical) - // 3. Column: fnlwgt (numeric) - // 4. Column: education (text/categorical) - // 5. Column: education-num (numeric) - // 6. Column: marital-status (text/categorical) - // 7. Column: occupation (text/categorical) - // 8. Column: relationship (text/categorical) - // 9. Column: ethnicity (text/categorical) - // 10. Column: sex (text/categorical) - // 11. Column: capital-gain (numeric) - // 12. Column: capital-loss (numeric) - // 13. Column: hours-per-week (numeric) - // 14. Column: native-country (text/categorical) - // 15. Column: Column [Label]: IsOver50K (boolean) - - // Creating the ML.Net IHostEnvironment object, needed for the pipeline - var mlContext = new MLContext(); - - var reader = mlContext.Data.ReadFromTextFile(dataFilePath, new TextLoader.Arguments - { - Separators = new[] { ',' }, - HasHeader = true, - Columns = new[] - { - new TextLoader.Column("age", DataKind.R4, 0), - new TextLoader.Column("workclass", DataKind.Text, 1), - new TextLoader.Column("fnlwgt", DataKind.R4, 2), - new TextLoader.Column("education", DataKind.Text, 3), - new TextLoader.Column("education-num", DataKind.R4, 4), - new TextLoader.Column("marital-status", DataKind.Text, 5), - new TextLoader.Column("occupation", DataKind.Text, 6), - new TextLoader.Column("relationship", DataKind.Text, 7), - new TextLoader.Column("ethnicity", DataKind.Text, 8), - new TextLoader.Column("sex", DataKind.Text, 9), - new TextLoader.Column("capital-gain", DataKind.R4, 10), - new TextLoader.Column("capital-loss", DataKind.R4, 11), - new TextLoader.Column("hours-per-week", DataKind.R4, 12), - new TextLoader.Column("native-country", DataKind.Text, 13), - new TextLoader.Column("Label", DataKind.Bool, 14) - } - }); - - // Read the data, and leave 10% out, so we can use them for testing - var (trainData, testData) = mlContext.BinaryClassification.TrainTestSplit(reader, testFraction: 0.1); - - // Create the Estimator - var pipeline = mlContext.Transforms.Categorical.OneHotEncoding(new OneHotEncodingEstimator.ColumnInfo[] - { - new OneHotEncodingEstimator.ColumnInfo("marital-status"), - new OneHotEncodingEstimator.ColumnInfo("occupation"), - new OneHotEncodingEstimator.ColumnInfo("relationship"), - new OneHotEncodingEstimator.ColumnInfo("ethnicity"), - new OneHotEncodingEstimator.ColumnInfo("sex"), - new OneHotEncodingEstimator.ColumnInfo("native-country"), - }) - .Append(mlContext.Transforms.FeatureSelection.SelectFeaturesBasedOnCount("native-country", count: 10)) - .Append(mlContext.Transforms.Concatenate("Features", - "age", - "education-num", - "marital-status", - "relationship", - "ethnicity", - "sex", - "hours-per-week", - "native-country")) - .Append(mlContext.Transforms.Normalize("Features")) - .Append(mlContext.BinaryClassification.Trainers.LightGbm()); - - // Fit this Pipeline to the Training Data - var model = pipeline.Fit(trainData); - - // Evaluate how the model is doing on the test data - var dataWithPredictions = model.Transform(testData); - - var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions); - - Console.WriteLine($"Accuracy: {metrics.Accuracy}"); // 0.84 - Console.WriteLine($"AUC: {metrics.Auc}"); // 0.88 - Console.WriteLine($"F1 Score: {metrics.F1Score}"); // 0.62 - - Console.WriteLine($"Negative Precision: {metrics.NegativePrecision}"); // 0.88 - Console.WriteLine($"Negative Recall: {metrics.NegativeRecall}"); // 0.91 - Console.WriteLine($"Positive Precision: {metrics.PositivePrecision}"); // 0.67 - Console.WriteLine($"Positive Recall: {metrics.PositiveRecall}"); // 0.58 - } - } -} \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassification.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassification.cs new file mode 100644 index 0000000000..2555ca9a4a --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassification.cs @@ -0,0 +1,62 @@ +using System; +using Microsoft.ML.Data; +using Microsoft.ML.Transforms.Categorical; + +namespace Microsoft.ML.Samples.Dynamic.Trainers +{ + public class LightGbmBinaryClassification + { + public static void Example() + { + // Creating the ML.Net IHostEnvironment object, needed for the pipeline + var mlContext = new MLContext(); + + // Download the dataset and load it as IDataView + var dataview = SamplesUtils.DatasetUtils.LoadAdultDataset(mlContext); + + // Leave out 10% of data for testing + var (trainData, testData) = mlContext.BinaryClassification.TrainTestSplit(dataview, testFraction: 0.1); + + // Create the Estimator + var pipeline = mlContext.Transforms.Categorical.OneHotEncoding(new OneHotEncodingEstimator.ColumnInfo[] + { + new OneHotEncodingEstimator.ColumnInfo("marital-status"), + new OneHotEncodingEstimator.ColumnInfo("occupation"), + new OneHotEncodingEstimator.ColumnInfo("relationship"), + new OneHotEncodingEstimator.ColumnInfo("ethnicity"), + new OneHotEncodingEstimator.ColumnInfo("sex"), + new OneHotEncodingEstimator.ColumnInfo("native-country"), + }) + .Append(mlContext.Transforms.FeatureSelection.SelectFeaturesBasedOnCount("native-country", count: 10)) + .Append(mlContext.Transforms.Concatenate("Features", + "age", + "education-num", + "marital-status", + "relationship", + "ethnicity", + "sex", + "hours-per-week", + "native-country")) + .Append(mlContext.Transforms.Normalize("Features")) + .Append(mlContext.BinaryClassification.Trainers.LightGbm("IsOver50K", "Features")); + + // Fit this Pipeline to the Training Data + var model = pipeline.Fit(trainData); + + // Evaluate how the model is doing on the test data + var dataWithPredictions = model.Transform(testData); + + var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions, "IsOver50K"); + SamplesUtils.ConsoleUtils.PrintBinaryClassificationMetrics(metrics); + + // Output: + // Accuracy: 0.84 + // AUC: 0.88 + // F1 Score: 0.62 + // Negative Precision: 0.88 + // Negative Recall: 0.91 + // Positive Precision: 0.68 + // Positive Recall: 0.59 + } + } +} \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/LightGBM/LightGBMMulticlassClassification.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassification.cs similarity index 72% rename from docs/samples/Microsoft.ML.Samples/Dynamic/LightGBM/LightGBMMulticlassClassification.cs rename to docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassification.cs index 36d2467a71..049508cff6 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/LightGBM/LightGBMMulticlassClassification.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassification.cs @@ -3,11 +3,11 @@ using Microsoft.ML.Data; using Microsoft.ML.SamplesUtils; -namespace Microsoft.ML.Samples.Dynamic.LightGBM +namespace Microsoft.ML.Samples.Dynamic.Trainers { class LightGbmMulticlassClassification { - public static void LightGbmMulticlassClassificationExample() + public static void Example() { // Create a general context for ML.NET operations. It can be used for exception tracking and logging, // as a catalog of available operations and as the source of randomness. @@ -19,6 +19,13 @@ public static void LightGbmMulticlassClassificationExample() // Convert native C# class to IDataView, a consumble format to ML.NET functions. var dataView = mlContext.Data.ReadFromEnumerable(examples); + //////////////////// Data Preview //////////////////// + // Label Features + // AA 0.7262433,0.8173254,0.7680227,0.5581612,0.2060332,0.5588848,0.9060271,0.4421779,0.9775497,0.2737045 + // BB 0.4919063,0.6673147,0.8326591,0.6695119,1.182151,0.230367,1.06237,1.195347,0.8771811,0.5145918 + // CC 1.216908,1.248052,1.391902,0.4326252,1.099942,0.9262842,1.334019,1.08762,0.9468155,0.4811099 + // DD 0.7871246,1.053327,0.8971719,1.588544,1.242697,1.362964,0.6303943,0.9810045,0.9431419,1.557455 + // Create a pipeline. // - Convert the string labels into key types. // - Apply LightGbm multiclass trainer @@ -37,24 +44,22 @@ public static void LightGbmMulticlassClassificationExample() // Do prediction on the test set. var dataWithPredictions = model.Transform(testingData); - // Evaluate the trained model is the test set. + // Evaluate the trained model using the test set. var metrics = mlContext.MulticlassClassification.Evaluate(dataWithPredictions, label: "LabelIndex"); - // Check if metrics are resonable. - Console.WriteLine("Macro accuracy: {0}, Micro accuracy: {1}.", 0.863482146891263, 0.86309523809523814); + // Check if metrics are reasonable. + Console.WriteLine("Macro accuracy: {0}, Micro accuracy: {1}.", metrics.AccuracyMacro, metrics.AccuracyMicro); + // Console output: + // Macro accuracy: 0.863482146891263, Micro accuracy: 0.863095238095238. - // Convert prediction in ML.NET format to native C# class. + // IDataView with predictions, to an IEnumerable var nativePredictions = mlContext.CreateEnumerable(dataWithPredictions, false).ToList(); // Get schema object out of the prediction. It contains metadata such as the mapping from predicted label index - // (e.g., 1) to its actual label (e.g., "AA"). The call to "AsDynamic" converts our statically-typed pipeline into - // a dynamically-typed one only for extracting metadata. In the future, metadata in statically-typed pipeline should - // be accessible without dynamically-typed things. - var schema = dataWithPredictions.Schema; - - // Retrieve the mapping from labels to label indexes. + // (e.g., 1) to its actual label (e.g., "AA"). + // The metadata can be used to get all the unique labels used during training. var labelBuffer = new VBuffer>(); - schema[nameof(DatasetUtils.MulticlassClassificationExample.PredictedLabelIndex)].Metadata.GetValue("KeyValues", ref labelBuffer); + dataWithPredictions.Schema["PredictedLabelIndex"].GetKeyValues(ref labelBuffer); // nativeLabels is { "AA" , "BB", "CC", "DD" } var nativeLabels = labelBuffer.DenseValues().ToArray(); // nativeLabels[nativePrediction.PredictedLabelIndex - 1] is the original label indexed by nativePrediction.PredictedLabelIndex. @@ -67,14 +72,13 @@ public static void LightGbmMulticlassClassificationExample() nativeLabels[(int)nativePrediction.PredictedLabelIndex - 1], nativePrediction.Scores[(int)nativePrediction.PredictedLabelIndex - 1]); - var expectedProbabilities = new float[] { 0.922597349f, 0.07508608f, 0.00221699756f, 9.95488E-05f }; // Scores and nativeLabels are two parallel attributes; that is, Scores[i] is the probability of being nativeLabels[i]. // Console output: // The probability of being class "AA" is 0.922597349. // The probability of being class "BB" is 0.07508608. // The probability of being class "CC" is 0.00221699756. // The probability of being class "DD" is 9.95488E-05. - for (int i = 0; i < labelBuffer.Length; ++i) + for (int i = 0; i < nativeLabels.Length; ++i) Console.WriteLine("The probability of being class {0} is {1}.", nativeLabels[i], nativePrediction.Scores[i]); } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/LightGBM/LightGBMRegression.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegression.cs similarity index 92% rename from docs/samples/Microsoft.ML.Samples/Dynamic/LightGBM/LightGBMRegression.cs rename to docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegression.cs index 669c0a40fb..c2d1194098 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/LightGBM/LightGBMRegression.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegression.cs @@ -1,11 +1,11 @@ using System; using Microsoft.ML.Data; -namespace Microsoft.ML.Samples.Dynamic.LightGBM +namespace Microsoft.ML.Samples.Dynamic.Trainers { class LightGbmRegression { - public static void LightGbmRegressionExample() + public static void Example() { // Downloading a regression dataset from github.com/dotnet/machinelearning // this will create a housing.txt file in the filsystem this code will run @@ -49,13 +49,12 @@ public static void LightGbmRegressionExample() // Fit this pipeline to the training data var model = pipeline.Fit(trainData); - // Check the weights that the model learned + // Get the feature importance based on the information gain used during training. VBuffer weights = default; model.Model.GetFeatureWeights(ref weights); - var weightsValues = weights.GetValues(); Console.WriteLine($"weight 0 - {weightsValues[0]}"); // CrimesPerCapita (weight 0) = 0.1898361 - Console.WriteLine($"weight 1 - {weightsValues[5]}"); // RoomsPerDwelling (weight 1) = 1 + Console.WriteLine($"weight 5 - {weightsValues[5]}"); // RoomsPerDwelling (weight 5) = 1 // Evaluate how the model is doing on the test data var dataWithPredictions = model.Transform(testData); diff --git a/src/Microsoft.ML.LightGBM.StaticPipe/LightGbmStaticExtensions.cs b/src/Microsoft.ML.LightGBM.StaticPipe/LightGbmStaticExtensions.cs index 294bac2628..1ff65322d8 100644 --- a/src/Microsoft.ML.LightGBM.StaticPipe/LightGbmStaticExtensions.cs +++ b/src/Microsoft.ML.LightGBM.StaticPipe/LightGbmStaticExtensions.cs @@ -38,7 +38,7 @@ public static class LightGbmStaticExtensions /// /// /// /// public static Scalar LightGbm(this RegressionCatalog.RegressionTrainers catalog, @@ -122,7 +122,7 @@ public static Scalar LightGbm(this RegressionCatalog.RegressionTrainers c /// /// /// /// public static (Scalar score, Scalar probability, Scalar predictedLabel) LightGbm(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, @@ -296,7 +296,7 @@ public static Scalar LightGbm(this RankingCatalog.RankingTrainers c /// /// /// /// /// diff --git a/src/Microsoft.ML.LightGBM/LightGbmArguments.cs b/src/Microsoft.ML.LightGBM/LightGbmArguments.cs index da8905b0ca..eb3ead90fd 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmArguments.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmArguments.cs @@ -55,7 +55,7 @@ protected BoosterParameter(TArgs args) /// /// Update the parameters by specific Booster, will update parameters into "res" directly. /// - public virtual void UpdateParameters(Dictionary res) + internal virtual void UpdateParameters(Dictionary res) { FieldInfo[] fields = Args.GetType().GetFields(BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance); foreach (var field in fields) @@ -68,6 +68,8 @@ public virtual void UpdateParameters(Dictionary res) res[GetArgName(field.Name)] = field.GetValue(Args); } } + + void IBoosterParameter.UpdateParameters(Dictionary res) => UpdateParameters(res); } private static string GetArgName(string name) @@ -93,14 +95,13 @@ private static string GetArgName(string name) [BestFriend] internal static class Defaults { - [BestFriend] - internal const int NumBoostRound = 100; + public const int NumBoostRound = 100; } public sealed class TreeBooster : BoosterParameter { public const string Name = "gbdt"; - public const string FriendlyName = "Tree Booster"; + internal const string FriendlyName = "Tree Booster"; [TlcModule.Component(Name = Name, FriendlyName = FriendlyName, Desc = "Traditional Gradient Boosting Decision Tree.")] public class Arguments : ISupportBoosterParameterFactory @@ -165,7 +166,9 @@ public class Arguments : ISupportBoosterParameterFactory " A typical value to consider: sum(negative cases) / sum(positive cases).")] public double ScalePosWeight = 1; - public virtual IBoosterParameter CreateComponent(IHostEnvironment env) => new TreeBooster(this); + internal virtual IBoosterParameter CreateComponent(IHostEnvironment env) => new TreeBooster(this); + + IBoosterParameter IComponentFactory.CreateComponent(IHostEnvironment env) => CreateComponent(env); } public TreeBooster(Arguments args) @@ -178,7 +181,7 @@ public TreeBooster(Arguments args) Contracts.CheckUserArg(Args.ScalePosWeight > 0 && Args.ScalePosWeight <= 1, nameof(Args.ScalePosWeight), "must be in (0,1]."); } - public override void UpdateParameters(Dictionary res) + internal override void UpdateParameters(Dictionary res) { base.UpdateParameters(res); res["boosting_type"] = Name; @@ -188,7 +191,7 @@ public override void UpdateParameters(Dictionary res) public class DartBooster : BoosterParameter { public const string Name = "dart"; - public const string FriendlyName = "Tree Dropout Tree Booster"; + internal const string FriendlyName = "Tree Dropout Tree Booster"; [TlcModule.Component(Name = Name, FriendlyName = FriendlyName, Desc = "Dropouts meet Multiple Additive Regresion Trees. See https://arxiv.org/abs/1505.01866")] public class Arguments : TreeBooster.Arguments @@ -211,7 +214,7 @@ public class Arguments : TreeBooster.Arguments [Argument(ArgumentType.AtMostOnce, HelpText = "True will enable uniform drop.")] public bool UniformDrop = false; - public override IBoosterParameter CreateComponent(IHostEnvironment env) => new DartBooster(this); + internal override IBoosterParameter CreateComponent(IHostEnvironment env) => new DartBooster(this); } public DartBooster(Arguments args) @@ -222,7 +225,7 @@ public DartBooster(Arguments args) Contracts.CheckUserArg(Args.SkipDrop >= 0 && Args.SkipDrop < 1, nameof(Args.SkipDrop), "must be in [0,1)."); } - public override void UpdateParameters(Dictionary res) + internal override void UpdateParameters(Dictionary res) { base.UpdateParameters(res); res["boosting_type"] = Name; @@ -232,7 +235,7 @@ public override void UpdateParameters(Dictionary res) public class GossBooster : BoosterParameter { public const string Name = "goss"; - public const string FriendlyName = "Gradient-based One-Size Sampling"; + internal const string FriendlyName = "Gradient-based One-Size Sampling"; [TlcModule.Component(Name = Name, FriendlyName = FriendlyName, Desc = "Gradient-based One-Side Sampling.")] public class Arguments : TreeBooster.Arguments @@ -248,7 +251,7 @@ public class Arguments : TreeBooster.Arguments [TlcModule.Range(Inf = 0.0, Max = 1.0)] public double OtherRate = 0.1; - public override IBoosterParameter CreateComponent(IHostEnvironment env) => new GossBooster(this); + internal override IBoosterParameter CreateComponent(IHostEnvironment env) => new GossBooster(this); } public GossBooster(Arguments args) @@ -259,7 +262,7 @@ public GossBooster(Arguments args) Contracts.Check(Args.TopRate + Args.OtherRate <= 1, "Sum of topRate and otherRate cannot be larger than 1."); } - public override void UpdateParameters(Dictionary res) + internal override void UpdateParameters(Dictionary res) { base.UpdateParameters(res); res["boosting_type"] = Name; @@ -373,7 +376,7 @@ public enum EvalMetricType public double CatL2 = 10; [Argument(ArgumentType.Multiple, HelpText = "Parallel LightGBM Learning Algorithm", ShortName = "parag")] - public ISupportParallel ParallelTrainer = new SingleTrainerFactory(); + internal ISupportParallel ParallelTrainer = new SingleTrainerFactory(); internal Dictionary ToDictionary(IHost host) { diff --git a/src/Microsoft.ML.LightGBM/Parallel/IParallel.cs b/src/Microsoft.ML.LightGBM/Parallel/IParallel.cs index 522632a619..9948d1bb06 100644 --- a/src/Microsoft.ML.LightGBM/Parallel/IParallel.cs +++ b/src/Microsoft.ML.LightGBM/Parallel/IParallel.cs @@ -22,7 +22,7 @@ namespace Microsoft.ML.LightGBM /// /// Definition of ReduceScatter funtion /// - public delegate void ReduceScatterFunction([MarshalAs(UnmanagedType.LPArray, SizeParamIndex = 1)]byte[] input, int inputSize, int typeSize, + internal delegate void ReduceScatterFunction([MarshalAs(UnmanagedType.LPArray, SizeParamIndex = 1)]byte[] input, int inputSize, int typeSize, [MarshalAs(UnmanagedType.LPArray, SizeParamIndex = 5)]int[] blockStart, [MarshalAs(UnmanagedType.LPArray, SizeParamIndex = 5)]int[] blockLen, int numBlock, [MarshalAs(UnmanagedType.LPArray, SizeParamIndex = 7)]byte[] output, int outputSize, IntPtr reducer); @@ -30,11 +30,11 @@ public delegate void ReduceScatterFunction([MarshalAs(UnmanagedType.LPArray, Siz /// /// Definition of Allgather funtion /// - public delegate void AllgatherFunction([MarshalAs(UnmanagedType.LPArray, SizeParamIndex = 1)]byte[] input, int inputSize, + internal delegate void AllgatherFunction([MarshalAs(UnmanagedType.LPArray, SizeParamIndex = 1)]byte[] input, int inputSize, [MarshalAs(UnmanagedType.LPArray, SizeParamIndex = 4)]int[] blockStart, [MarshalAs(UnmanagedType.LPArray, SizeParamIndex = 4)]int[] blockLen, int numBlock, [MarshalAs(UnmanagedType.LPArray, SizeParamIndex = 6)]byte[] output, int outputSize); - public interface IParallel + internal interface IParallel { /// /// Type of parallel @@ -68,7 +68,7 @@ public interface IParallel } [TlcModule.ComponentKind("ParallelLightGBM")] - public interface ISupportParallel : IComponentFactory + internal interface ISupportParallel : IComponentFactory { } } diff --git a/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs b/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs new file mode 100644 index 0000000000..814a251d6a --- /dev/null +++ b/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs @@ -0,0 +1,21 @@ +using System; +using System.Collections.Generic; +using System.Text; +using Microsoft.ML.Data; + +namespace Microsoft.ML.SamplesUtils +{ + public static class ConsoleUtils + { + public static void PrintBinaryClassificationMetrics(BinaryClassificationMetrics metrics) + { + Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}"); + Console.WriteLine($"AUC: {metrics.Auc:F2}"); + Console.WriteLine($"F1 Score: {metrics.F1Score:F2}"); + Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}"); + Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}"); + } + } +} diff --git a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs index 17ce2e3ab7..19b95dc6cc 100644 --- a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs +++ b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs @@ -86,6 +86,38 @@ public static string DownloadSentimentDataset() public static string DownloadAdultDataset() => Download("https://raw.githubusercontent.com/dotnet/machinelearning/244a8c2ac832657af282aa312d568211698790aa/test/data/adult.train", "adult.txt"); + public static IDataView LoadAdultDataset(MLContext mlContext) + { + // Download the file + string dataFile = DownloadAdultDataset(); + + // Define the columns to read + var reader = mlContext.Data.CreateTextLoader( + columns: new[] + { + new TextLoader.Column("age", DataKind.R4, 0), + new TextLoader.Column("workclass", DataKind.TX, 1), + new TextLoader.Column("fnlwgt", DataKind.R4, 2), + new TextLoader.Column("education", DataKind.TX, 3), + new TextLoader.Column("education-num", DataKind.R4, 4), + new TextLoader.Column("marital-status", DataKind.TX, 5), + new TextLoader.Column("occupation", DataKind.TX, 6), + new TextLoader.Column("relationship", DataKind.TX, 7), + new TextLoader.Column("ethnicity", DataKind.TX, 8), + new TextLoader.Column("sex", DataKind.TX, 9), + new TextLoader.Column("capital-gain", DataKind.R4, 10), + new TextLoader.Column("capital-loss", DataKind.R4, 11), + new TextLoader.Column("hours-per-week", DataKind.R4, 12), + new TextLoader.Column("native-country", DataKind.R4, 13), + new TextLoader.Column("IsOver50K", DataKind.BL, 14), + }, + separatorChar: ',', + hasHeader: true + ); + + return reader.Read(dataFile); + } + /// /// Downloads the breast cancer dataset from the ML.NET repo. /// From 01f4188a41191200e491c5db679ba5bbfe271b10 Mon Sep 17 00:00:00 2001 From: zeahmed Date: Mon, 11 Feb 2019 17:07:58 -0800 Subject: [PATCH 03/14] Addressed reviewers' comments. --- .../LightGBMBinaryClassification.cs | 2 +- .../Trainers/Regression/LightGBMRegression.cs | 12 +++++++----- src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs | 11 ++++++++++- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassification.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassification.cs index 2555ca9a4a..5ac8aeede0 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassification.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassification.cs @@ -47,7 +47,7 @@ public static void Example() var dataWithPredictions = model.Transform(testData); var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions, "IsOver50K"); - SamplesUtils.ConsoleUtils.PrintBinaryClassificationMetrics(metrics); + SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Output: // Accuracy: 0.84 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegression.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegression.cs index c2d1194098..715d5d1d82 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegression.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegression.cs @@ -59,12 +59,14 @@ public static void Example() // Evaluate how the model is doing on the test data var dataWithPredictions = model.Transform(testData); var metrics = mlContext.Regression.Evaluate(dataWithPredictions); + SamplesUtils.ConsoleUtils.PrintMetrics(metrics); - Console.WriteLine($"L1 - {metrics.L1}"); // 4.9669731 - Console.WriteLine($"L2 - {metrics.L2}"); // 51.37296 - Console.WriteLine($"LossFunction - {metrics.LossFn}"); // 51.37296 - Console.WriteLine($"RMS - {metrics.Rms}"); // 7.167493 - Console.WriteLine($"RSquared - {metrics.RSquared}"); // 0.079478 + // Output + // L1: 4.97 + // L2: 51.37 + // LossFunction: 51.37 + // RMS: 7.17 + // RSquared: 0.08 } } } diff --git a/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs b/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs index 814a251d6a..137b54a2fe 100644 --- a/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs +++ b/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs @@ -7,7 +7,7 @@ namespace Microsoft.ML.SamplesUtils { public static class ConsoleUtils { - public static void PrintBinaryClassificationMetrics(BinaryClassificationMetrics metrics) + public static void PrintMetrics(BinaryClassificationMetrics metrics) { Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}"); Console.WriteLine($"AUC: {metrics.Auc:F2}"); @@ -17,5 +17,14 @@ public static void PrintBinaryClassificationMetrics(BinaryClassificationMetrics Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}"); Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}"); } + + public static void PrintMetrics(RegressionMetrics metrics) + { + Console.WriteLine($"L1: {metrics.L1:F2}"); + Console.WriteLine($"L2: {metrics.L2:F2}"); + Console.WriteLine($"LossFunction: {metrics.LossFn:F2}"); + Console.WriteLine($"RMS: {metrics.Rms:F2}"); + Console.WriteLine($"RSquared: {metrics.RSquared:F2}"); + } } } From 85d53d1f84d33ba6cfffb560d35ddf9028dc72ee Mon Sep 17 00:00:00 2001 From: Zeeshan Ahmed Date: Tue, 12 Feb 2019 13:08:58 -0800 Subject: [PATCH 04/14] Addressed reviewers' comments and added more samples. --- .../LightGBMBinaryClassification.cs | 6 +- ...LightGBMBinaryClassificationWithOptions.cs | 72 ++++++++++++++ .../LightGBMMulticlassClassification.cs | 23 +++-- ...tGBMMulticlassClassificationWithOptions.cs | 95 +++++++++++++++++++ .../Trainers/Regression/LightGBMRegression.cs | 2 +- .../LightGBMRegressionWithOptions.cs | 85 +++++++++++++++++ .../Static/LightGBMRegression.cs | 4 +- .../LightGbmArguments.cs | 6 +- src/Microsoft.ML.LightGBM/LightGbmCatalog.cs | 27 +++++- .../LightGbmMulticlassTrainer.cs | 2 +- 10 files changed, 296 insertions(+), 26 deletions(-) create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassificationWithOptions.cs create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassificationWithOptions.cs create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegressionWithOptions.cs diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassification.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassification.cs index 5ac8aeede0..9f9db8a705 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassification.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassification.cs @@ -1,8 +1,6 @@ -using System; -using Microsoft.ML.Data; -using Microsoft.ML.Transforms.Categorical; +using Microsoft.ML.Transforms.Categorical; -namespace Microsoft.ML.Samples.Dynamic.Trainers +namespace Microsoft.ML.Samples.Dynamic { public class LightGbmBinaryClassification { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassificationWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassificationWithOptions.cs new file mode 100644 index 0000000000..c6ab7042bc --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassificationWithOptions.cs @@ -0,0 +1,72 @@ +using Microsoft.ML.LightGBM; +using Microsoft.ML.Transforms.Categorical; +using static Microsoft.ML.LightGBM.Options; + +namespace Microsoft.ML.Samples.Dynamic +{ + class LightGbmBinaryClassificationWithOptions + { + public static void Example() + { + // Creating the ML.Net IHostEnvironment object, needed for the pipeline + var mlContext = new MLContext(); + + // Download the dataset and load it as IDataView + var dataview = SamplesUtils.DatasetUtils.LoadAdultDataset(mlContext); + + // Leave out 10% of data for testing + var (trainData, testData) = mlContext.BinaryClassification.TrainTestSplit(dataview, testFraction: 0.1); + + // Create the pipeline with LightGbm Estimator using advanced options + var pipeline = mlContext.Transforms.Categorical.OneHotEncoding(new OneHotEncodingEstimator.ColumnInfo[] + { + new OneHotEncodingEstimator.ColumnInfo("marital-status"), + new OneHotEncodingEstimator.ColumnInfo("occupation"), + new OneHotEncodingEstimator.ColumnInfo("relationship"), + new OneHotEncodingEstimator.ColumnInfo("ethnicity"), + new OneHotEncodingEstimator.ColumnInfo("sex"), + new OneHotEncodingEstimator.ColumnInfo("native-country"), + }) + .Append(mlContext.Transforms.FeatureSelection.SelectFeaturesBasedOnCount("native-country", count: 10)) + .Append(mlContext.Transforms.Concatenate("Features", + "age", + "education-num", + "marital-status", + "relationship", + "ethnicity", + "sex", + "hours-per-week", + "native-country")) + .Append(mlContext.Transforms.Normalize("Features")) + .Append(mlContext.BinaryClassification.Trainers.LightGbm( + new Options + { + LabelColumn = "IsOver50K", + FeatureColumn = "Features", + Booster = new GossBooster.Arguments + { + TopRate = 0.3, + OtherRate = 0.2 + } + })); + + // Fit this Pipeline to the Training Data + var model = pipeline.Fit(trainData); + + // Evaluate how the model is doing on the test data + var dataWithPredictions = model.Transform(testData); + + var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions, "IsOver50K"); + SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + + // Output: + // Accuracy: 0.84 + // AUC: 0.88 + // F1 Score: 0.62 + // Negative Precision: 0.88 + // Negative Recall: 0.92 + // Positive Precision: 0.67 + // Positive Recall: 0.58 + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassification.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassification.cs index 049508cff6..f7c816ca59 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassification.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassification.cs @@ -3,7 +3,7 @@ using Microsoft.ML.Data; using Microsoft.ML.SamplesUtils; -namespace Microsoft.ML.Samples.Dynamic.Trainers +namespace Microsoft.ML.Samples.Dynamic { class LightGbmMulticlassClassification { @@ -48,9 +48,9 @@ public static void Example() var metrics = mlContext.MulticlassClassification.Evaluate(dataWithPredictions, label: "LabelIndex"); // Check if metrics are reasonable. - Console.WriteLine("Macro accuracy: {0}, Micro accuracy: {1}.", metrics.AccuracyMacro, metrics.AccuracyMicro); + Console.WriteLine($"Macro accuracy: {metrics.AccuracyMacro:F4}, Micro accuracy: {metrics.AccuracyMicro:F4}."); // Console output: - // Macro accuracy: 0.863482146891263, Micro accuracy: 0.863095238095238. + // Macro accuracy: 0.8635, Micro accuracy: 0.8631. // IDataView with predictions, to an IEnumerable var nativePredictions = mlContext.CreateEnumerable(dataWithPredictions, false).ToList(); @@ -67,19 +67,18 @@ public static void Example() // Show prediction result for the 3rd example. var nativePrediction = nativePredictions[2]; // Console output: - // Our predicted label to this example is "AA" with probability 0.922597349. - Console.WriteLine("Our predicted label to this example is {0} with probability {1}", - nativeLabels[(int)nativePrediction.PredictedLabelIndex - 1], - nativePrediction.Scores[(int)nativePrediction.PredictedLabelIndex - 1]); + // Our predicted label to this example is "AA" with probability 0.9226. + Console.WriteLine($"Our predicted label to this example is {nativeLabels[(int)nativePrediction.PredictedLabelIndex - 1]} " + + $"with probability {nativePrediction.Scores[(int)nativePrediction.PredictedLabelIndex - 1]:F4}."); // Scores and nativeLabels are two parallel attributes; that is, Scores[i] is the probability of being nativeLabels[i]. // Console output: - // The probability of being class "AA" is 0.922597349. - // The probability of being class "BB" is 0.07508608. - // The probability of being class "CC" is 0.00221699756. - // The probability of being class "DD" is 9.95488E-05. + // The probability of being class "AA" is 0.9226. + // The probability of being class "BB" is 0.0751. + // The probability of being class "CC" is 0.0022. + // The probability of being class "DD" is 0.0001. for (int i = 0; i < nativeLabels.Length; ++i) - Console.WriteLine("The probability of being class {0} is {1}.", nativeLabels[i], nativePrediction.Scores[i]); + Console.WriteLine($"The probability of being class {nativeLabels[i]} is {nativePrediction.Scores[i]:F4}."); } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassificationWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassificationWithOptions.cs new file mode 100644 index 0000000000..ad0670d22a --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassificationWithOptions.cs @@ -0,0 +1,95 @@ +using System; +using System.Linq; +using Microsoft.ML.Data; +using Microsoft.ML.LightGBM; +using Microsoft.ML.SamplesUtils; +using static Microsoft.ML.LightGBM.Options; + +namespace Microsoft.ML.Samples.Dynamic +{ + class LightGbmMulticlassClassificationWithOptions + { + public static void Example() + { + // Create a general context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + var mlContext = new MLContext(seed: 0); + + // Create in-memory examples as C# native class. + var examples = DatasetUtils.GenerateRandomMulticlassClassificationExamples(1000); + + // Convert native C# class to IDataView, a consumble format to ML.NET functions. + var dataView = mlContext.Data.ReadFromEnumerable(examples); + + //////////////////// Data Preview //////////////////// + // Label Features + // AA 0.7262433,0.8173254,0.7680227,0.5581612,0.2060332,0.5588848,0.9060271,0.4421779,0.9775497,0.2737045 + // BB 0.4919063,0.6673147,0.8326591,0.6695119,1.182151,0.230367,1.06237,1.195347,0.8771811,0.5145918 + // CC 1.216908,1.248052,1.391902,0.4326252,1.099942,0.9262842,1.334019,1.08762,0.9468155,0.4811099 + // DD 0.7871246,1.053327,0.8971719,1.588544,1.242697,1.362964,0.6303943,0.9810045,0.9431419,1.557455 + + // Create a pipeline. + // - Convert the string labels into key types. + // - Apply LightGbm multiclass trainer with advanced options + var pipeline = mlContext.Transforms.Conversion.MapValueToKey("LabelIndex", "Label") + .Append(mlContext.MulticlassClassification.Trainers.LightGbm(new Options + { + LabelColumn = "LabelIndex", + FeatureColumn = "Features", + Booster = new DartBooster.Arguments + { + DropRate = 0.15, + XgboostDartMode = false + } + })) + .Append(mlContext.Transforms.Conversion.MapValueToKey("PredictedLabelIndex", "PredictedLabel")) + .Append(mlContext.Transforms.CopyColumns("Scores", "Score")); + + // Split the static-typed data into training and test sets. Only training set is used in fitting + // the created pipeline. Metrics are computed on the test. + var (trainingData, testingData) = mlContext.MulticlassClassification.TrainTestSplit(dataView, testFraction: 0.5); + + // Train the model. + var model = pipeline.Fit(trainingData); + + // Do prediction on the test set. + var dataWithPredictions = model.Transform(testingData); + + // Evaluate the trained model using the test set. + var metrics = mlContext.MulticlassClassification.Evaluate(dataWithPredictions, label: "LabelIndex"); + + // Check if metrics are reasonable. + Console.WriteLine($"Macro accuracy: {metrics.AccuracyMacro:F4}, Micro accuracy: {metrics.AccuracyMicro:F4}."); + // Console output: + // Macro accuracy: 0.8562, Micro accuracy: 0.8552. + + // IDataView with predictions, to an IEnumerable + var nativePredictions = mlContext.CreateEnumerable(dataWithPredictions, false).ToList(); + + // Get schema object out of the prediction. It contains metadata such as the mapping from predicted label index + // (e.g., 1) to its actual label (e.g., "AA"). + // The metadata can be used to get all the unique labels used during training. + var labelBuffer = new VBuffer>(); + dataWithPredictions.Schema["PredictedLabelIndex"].GetKeyValues(ref labelBuffer); + // nativeLabels is { "AA" , "BB", "CC", "DD" } + var nativeLabels = labelBuffer.DenseValues().ToArray(); // nativeLabels[nativePrediction.PredictedLabelIndex - 1] is the original label indexed by nativePrediction.PredictedLabelIndex. + + + // Show prediction result for the 3rd example. + var nativePrediction = nativePredictions[2]; + // Console output: + // Our predicted label to this example is AA with probability 0.9592. + Console.WriteLine($"Our predicted label to this example is {nativeLabels[(int)nativePrediction.PredictedLabelIndex - 1]} " + + $"with probability {nativePrediction.Scores[(int)nativePrediction.PredictedLabelIndex - 1]:F4}."); + + // Scores and nativeLabels are two parallel attributes; that is, Scores[i] is the probability of being nativeLabels[i]. + // Console output: + // The probability of being class AA is 0.9592. + // The probability of being class BB is 0.0392. + // The probability of being class CC is 0.0015. + // The probability of being class DD is 0.0002. + for (int i = 0; i < nativeLabels.Length; ++i) + Console.WriteLine($"The probability of being class {nativeLabels[i]} is {nativePrediction.Scores[i]:F4}."); + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegression.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegression.cs index 715d5d1d82..f640934608 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegression.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegression.cs @@ -1,7 +1,7 @@ using System; using Microsoft.ML.Data; -namespace Microsoft.ML.Samples.Dynamic.Trainers +namespace Microsoft.ML.Samples.Dynamic { class LightGbmRegression { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegressionWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegressionWithOptions.cs new file mode 100644 index 0000000000..b6a4eeb3ca --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegressionWithOptions.cs @@ -0,0 +1,85 @@ +using System; +using Microsoft.ML.Data; +using Microsoft.ML.LightGBM; +using static Microsoft.ML.LightGBM.Options; + +namespace Microsoft.ML.Samples.Dynamic +{ + class LightGbmRegressionWithOptions + { + public static void Example() + { + // Downloading a regression dataset from github.com/dotnet/machinelearning + // this will create a housing.txt file in the filsystem this code will run + // you can open the file to see the data. + string dataFile = SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset(); + + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var mlContext = new MLContext(); + + // Creating a data reader, based on the format of the data + // The data is tab separated with all numeric columns. + // The first column being the label and rest are numeric features + // Here only seven numeric columns are used as features + var dataView = mlContext.Data.ReadFromTextFile(dataFile, new TextLoader.Arguments + { + Separators = new[] { '\t' }, + HasHeader = true, + Columns = new[] + { + new TextLoader.Column("LabelColumn", DataKind.R4, 0), + new TextLoader.Column("FeaturesColumn", DataKind.R4, 1, 6) + } + }); + + //////////////////// Data Preview //////////////////// + // MedianHomeValue CrimesPerCapita PercentResidental PercentNonRetail CharlesRiver NitricOxides RoomsPerDwelling PercentPre40s + // 24.00 0.00632 18.00 2.310 0 0.5380 6.5750 65.20 + // 21.60 0.02731 00.00 7.070 0 0.4690 6.4210 78.90 + // 34.70 0.02729 00.00 7.070 0 0.4690 7.1850 61.10 + + var (trainData, testData) = mlContext.Regression.TrainTestSplit(dataView, testFraction: 0.1); + + // Create a pipeline with LightGbm estimator with advanced options, + // here we only need LightGbm trainer as data is already processed + // in a form consumable by the trainer + var options = new Options + { + LabelColumn = "LabelColumn", + FeatureColumn = "FeaturesColumn", + NumLeaves = 4, + MinDataPerLeaf = 6, + LearningRate = 0.001, + Booster = new GossBooster.Arguments + { + TopRate = 0.3, + OtherRate = 0.2 + } + }; + var pipeline = mlContext.Regression.Trainers.LightGbm(options); + + // Fit this pipeline to the training data + var model = pipeline.Fit(trainData); + + // Get the feature importance based on the information gain used during training. + VBuffer weights = default; + model.Model.GetFeatureWeights(ref weights); + var weightsValues = weights.GetValues(); + Console.WriteLine($"weight 0 - {weightsValues[0]}"); // CrimesPerCapita (weight 0) = 0.1898361 + Console.WriteLine($"weight 5 - {weightsValues[5]}"); // RoomsPerDwelling (weight 5) = 1 + + // Evaluate how the model is doing on the test data + var dataWithPredictions = model.Transform(testData); + var metrics = mlContext.Regression.Evaluate(dataWithPredictions, "LabelColumn"); + SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + + // Output + // L1: 4.97 + // L2: 51.37 + // LossFunction: 51.37 + // RMS: 7.17 + // RSquared: 0.08 + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Static/LightGBMRegression.cs b/docs/samples/Microsoft.ML.Samples/Static/LightGBMRegression.cs index fffad9181b..edcfd39efd 100644 --- a/docs/samples/Microsoft.ML.Samples/Static/LightGBMRegression.cs +++ b/docs/samples/Microsoft.ML.Samples/Static/LightGBMRegression.cs @@ -11,8 +11,8 @@ public class LightGbmRegressionExample public static void LightGbmRegression() { // Downloading a regression dataset from github.com/dotnet/machinelearning - // this will create a housing.txt file in the filsystem this code will run - // you can open the file to see the data. + // this will create a housing.txt file in the filsystem. + // You can open the file to see the data. string dataFile = SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset(); // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, diff --git a/src/Microsoft.ML.LightGBM/LightGbmArguments.cs b/src/Microsoft.ML.LightGBM/LightGbmArguments.cs index eb3ead90fd..68ca5fe7f6 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmArguments.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmArguments.cs @@ -171,7 +171,7 @@ public class Arguments : ISupportBoosterParameterFactory IBoosterParameter IComponentFactory.CreateComponent(IHostEnvironment env) => CreateComponent(env); } - public TreeBooster(Arguments args) + internal TreeBooster(Arguments args) : base(args) { Contracts.CheckUserArg(Args.MinSplitGain >= 0, nameof(Args.MinSplitGain), "must be >= 0."); @@ -217,7 +217,7 @@ public class Arguments : TreeBooster.Arguments internal override IBoosterParameter CreateComponent(IHostEnvironment env) => new DartBooster(this); } - public DartBooster(Arguments args) + internal DartBooster(Arguments args) : base(args) { Contracts.CheckUserArg(Args.DropRate > 0 && Args.DropRate < 1, nameof(Args.DropRate), "must be in (0,1)."); @@ -254,7 +254,7 @@ public class Arguments : TreeBooster.Arguments internal override IBoosterParameter CreateComponent(IHostEnvironment env) => new GossBooster(this); } - public GossBooster(Arguments args) + internal GossBooster(Arguments args) : base(args) { Contracts.CheckUserArg(Args.TopRate > 0 && Args.TopRate < 1, nameof(Args.TopRate), "must be in (0,1)."); diff --git a/src/Microsoft.ML.LightGBM/LightGbmCatalog.cs b/src/Microsoft.ML.LightGBM/LightGbmCatalog.cs index 5153f0897d..08a968ec5d 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmCatalog.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmCatalog.cs @@ -27,7 +27,7 @@ public static class LightGbmExtensions /// /// /// /// /// @@ -50,6 +50,13 @@ public static LightGbmRegressorTrainer LightGbm(this RegressionCatalog.Regressio /// /// The . /// Advanced options to the algorithm. + /// + /// + /// + /// + /// public static LightGbmRegressorTrainer LightGbm(this RegressionCatalog.RegressionTrainers catalog, Options options) { @@ -72,7 +79,7 @@ public static LightGbmRegressorTrainer LightGbm(this RegressionCatalog.Regressio /// /// /// /// /// @@ -95,6 +102,13 @@ public static LightGbmBinaryTrainer LightGbm(this BinaryClassificationCatalog.Bi /// /// The . /// Advanced options to the algorithm. + /// + /// + /// + /// + /// public static LightGbmBinaryTrainer LightGbm(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, Options options) { @@ -157,7 +171,7 @@ public static LightGbmRankingTrainer LightGbm(this RankingCatalog.RankingTrainer /// /// /// /// /// @@ -180,6 +194,13 @@ public static LightGbmMulticlassTrainer LightGbm(this MulticlassClassificationCa /// /// The . /// Advanced options to the algorithm. + /// + /// + /// + /// + /// public static LightGbmMulticlassTrainer LightGbm(this MulticlassClassificationCatalog.MulticlassClassificationTrainers catalog, Options options) { diff --git a/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs index 3b2a1b56af..9e54babf6d 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs @@ -36,7 +36,7 @@ public sealed class LightGbmMulticlassTrainer : LightGbmTrainerBase PredictionKind.MultiClassClassification; internal LightGbmMulticlassTrainer(IHostEnvironment env, Options options) - : base(env, LoadNameValue, options, TrainerUtils.MakeBoolScalarLabel(options.LabelColumn)) + : base(env, LoadNameValue, options, TrainerUtils.MakeU4ScalarColumn(options.LabelColumn)) { _numClass = -1; } From 3f91b542fe0370f3879c00a0594a9222914f1d2d Mon Sep 17 00:00:00 2001 From: Zeeshan Ahmed Date: Tue, 12 Feb 2019 13:15:12 -0800 Subject: [PATCH 05/14] Undo changes in LightGbmStaticExtension.cs --- .../LightGbmStaticExtensions.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Microsoft.ML.LightGBM.StaticPipe/LightGbmStaticExtensions.cs b/src/Microsoft.ML.LightGBM.StaticPipe/LightGbmStaticExtensions.cs index 1ff65322d8..294bac2628 100644 --- a/src/Microsoft.ML.LightGBM.StaticPipe/LightGbmStaticExtensions.cs +++ b/src/Microsoft.ML.LightGBM.StaticPipe/LightGbmStaticExtensions.cs @@ -38,7 +38,7 @@ public static class LightGbmStaticExtensions /// /// /// /// public static Scalar LightGbm(this RegressionCatalog.RegressionTrainers catalog, @@ -122,7 +122,7 @@ public static Scalar LightGbm(this RegressionCatalog.RegressionTrainers c /// /// /// /// public static (Scalar score, Scalar probability, Scalar predictedLabel) LightGbm(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, @@ -296,7 +296,7 @@ public static Scalar LightGbm(this RankingCatalog.RankingTrainers c /// /// /// /// /// From 924547090290cf94f3dcea01b285ff7f2380da47 Mon Sep 17 00:00:00 2001 From: zeahmed Date: Tue, 12 Feb 2019 21:29:48 -0800 Subject: [PATCH 06/14] Addressed reviewers' comments. --- .../BinaryClassification/LightGBMBinaryClassification.cs | 3 +++ .../LightGBMBinaryClassificationWithOptions.cs | 3 +++ .../LightGBMMulticlassClassification.cs | 3 +++ .../LightGBMMulticlassClassificationWithOptions.cs | 3 +++ .../Dynamic/Trainers/Regression/LightGBMRegression.cs | 3 +++ .../Trainers/Regression/LightGBMRegressionWithOptions.cs | 3 +++ 6 files changed, 18 insertions(+) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassification.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassification.cs index 9f9db8a705..5d62b0ed39 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassification.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassification.cs @@ -4,6 +4,9 @@ namespace Microsoft.ML.Samples.Dynamic { public class LightGbmBinaryClassification { + /// + /// This example require installation of addition nuget package Microsoft.ML.LightGBM + /// public static void Example() { // Creating the ML.Net IHostEnvironment object, needed for the pipeline diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassificationWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassificationWithOptions.cs index c6ab7042bc..cf01c42d90 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassificationWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassificationWithOptions.cs @@ -6,6 +6,9 @@ namespace Microsoft.ML.Samples.Dynamic { class LightGbmBinaryClassificationWithOptions { + /// + /// This example require installation of addition nuget package Microsoft.ML.LightGBM + /// public static void Example() { // Creating the ML.Net IHostEnvironment object, needed for the pipeline diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassification.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassification.cs index f7c816ca59..d6f9c61a49 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassification.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassification.cs @@ -7,6 +7,9 @@ namespace Microsoft.ML.Samples.Dynamic { class LightGbmMulticlassClassification { + /// + /// This example require installation of addition nuget package Microsoft.ML.LightGBM + /// public static void Example() { // Create a general context for ML.NET operations. It can be used for exception tracking and logging, diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassificationWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassificationWithOptions.cs index ad0670d22a..6318abf8ff 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassificationWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassificationWithOptions.cs @@ -9,6 +9,9 @@ namespace Microsoft.ML.Samples.Dynamic { class LightGbmMulticlassClassificationWithOptions { + /// + /// This example require installation of addition nuget package Microsoft.ML.LightGBM + /// public static void Example() { // Create a general context for ML.NET operations. It can be used for exception tracking and logging, diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegression.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegression.cs index f640934608..a80e193c3d 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegression.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegression.cs @@ -5,6 +5,9 @@ namespace Microsoft.ML.Samples.Dynamic { class LightGbmRegression { + /// + /// This example require installation of addition nuget package Microsoft.ML.LightGBM + /// public static void Example() { // Downloading a regression dataset from github.com/dotnet/machinelearning diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegressionWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegressionWithOptions.cs index b6a4eeb3ca..8590b181ba 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegressionWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegressionWithOptions.cs @@ -7,6 +7,9 @@ namespace Microsoft.ML.Samples.Dynamic { class LightGbmRegressionWithOptions { + /// + /// This example require installation of addition nuget package Microsoft.ML.LightGBM + /// public static void Example() { // Downloading a regression dataset from github.com/dotnet/machinelearning From 49509c1c88f7c9739b4b169851832c3421ccbfb4 Mon Sep 17 00:00:00 2001 From: zeahmed Date: Tue, 12 Feb 2019 22:03:33 -0800 Subject: [PATCH 07/14] Merged with the base and resolved merge conflicts. --- .../BinaryClassification/LightGBMBinaryClassification.cs | 6 +++--- .../LightGBMBinaryClassificationWithOptions.cs | 6 +++--- .../LightGBMMulticlassClassification.cs | 6 +++--- .../LightGBMMulticlassClassificationWithOptions.cs | 6 +++--- .../Dynamic/Trainers/Regression/LightGBMRegression.cs | 6 +++--- .../Trainers/Regression/LightGBMRegressionWithOptions.cs | 6 +++--- 6 files changed, 18 insertions(+), 18 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassification.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassification.cs index 5d62b0ed39..d4a4979664 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassification.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassification.cs @@ -16,7 +16,7 @@ public static void Example() var dataview = SamplesUtils.DatasetUtils.LoadAdultDataset(mlContext); // Leave out 10% of data for testing - var (trainData, testData) = mlContext.BinaryClassification.TrainTestSplit(dataview, testFraction: 0.1); + var split = mlContext.BinaryClassification.TrainTestSplit(dataview, testFraction: 0.1); // Create the Estimator var pipeline = mlContext.Transforms.Categorical.OneHotEncoding(new OneHotEncodingEstimator.ColumnInfo[] @@ -42,10 +42,10 @@ public static void Example() .Append(mlContext.BinaryClassification.Trainers.LightGbm("IsOver50K", "Features")); // Fit this Pipeline to the Training Data - var model = pipeline.Fit(trainData); + var model = pipeline.Fit(split.TrainSet); // Evaluate how the model is doing on the test data - var dataWithPredictions = model.Transform(testData); + var dataWithPredictions = model.Transform(split.TestSet); var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions, "IsOver50K"); SamplesUtils.ConsoleUtils.PrintMetrics(metrics); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassificationWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassificationWithOptions.cs index cf01c42d90..fdbf17a1a2 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassificationWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassificationWithOptions.cs @@ -18,7 +18,7 @@ public static void Example() var dataview = SamplesUtils.DatasetUtils.LoadAdultDataset(mlContext); // Leave out 10% of data for testing - var (trainData, testData) = mlContext.BinaryClassification.TrainTestSplit(dataview, testFraction: 0.1); + var split = mlContext.BinaryClassification.TrainTestSplit(dataview, testFraction: 0.1); // Create the pipeline with LightGbm Estimator using advanced options var pipeline = mlContext.Transforms.Categorical.OneHotEncoding(new OneHotEncodingEstimator.ColumnInfo[] @@ -54,10 +54,10 @@ public static void Example() })); // Fit this Pipeline to the Training Data - var model = pipeline.Fit(trainData); + var model = pipeline.Fit(split.TrainSet); // Evaluate how the model is doing on the test data - var dataWithPredictions = model.Transform(testData); + var dataWithPredictions = model.Transform(split.TestSet); var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions, "IsOver50K"); SamplesUtils.ConsoleUtils.PrintMetrics(metrics); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassification.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassification.cs index d6f9c61a49..76b5b105ee 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassification.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassification.cs @@ -39,13 +39,13 @@ public static void Example() // Split the static-typed data into training and test sets. Only training set is used in fitting // the created pipeline. Metrics are computed on the test. - var (trainingData, testingData) = mlContext.MulticlassClassification.TrainTestSplit(dataView, testFraction: 0.5); + var split = mlContext.MulticlassClassification.TrainTestSplit(dataView, testFraction: 0.5); // Train the model. - var model = pipeline.Fit(trainingData); + var model = pipeline.Fit(split.TrainSet); // Do prediction on the test set. - var dataWithPredictions = model.Transform(testingData); + var dataWithPredictions = model.Transform(split.TestSet); // Evaluate the trained model using the test set. var metrics = mlContext.MulticlassClassification.Evaluate(dataWithPredictions, label: "LabelIndex"); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassificationWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassificationWithOptions.cs index 6318abf8ff..ad310e7ced 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassificationWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassificationWithOptions.cs @@ -50,13 +50,13 @@ public static void Example() // Split the static-typed data into training and test sets. Only training set is used in fitting // the created pipeline. Metrics are computed on the test. - var (trainingData, testingData) = mlContext.MulticlassClassification.TrainTestSplit(dataView, testFraction: 0.5); + var split = mlContext.MulticlassClassification.TrainTestSplit(dataView, testFraction: 0.5); // Train the model. - var model = pipeline.Fit(trainingData); + var model = pipeline.Fit(split.TrainSet); // Do prediction on the test set. - var dataWithPredictions = model.Transform(testingData); + var dataWithPredictions = model.Transform(split.TestSet); // Evaluate the trained model using the test set. var metrics = mlContext.MulticlassClassification.Evaluate(dataWithPredictions, label: "LabelIndex"); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegression.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegression.cs index a80e193c3d..143b05e067 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegression.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegression.cs @@ -40,7 +40,7 @@ public static void Example() // 21.60 0.02731 00.00 7.070 0 0.4690 6.4210 78.90 // 34.70 0.02729 00.00 7.070 0 0.4690 7.1850 61.10 - var (trainData, testData) = mlContext.Regression.TrainTestSplit(dataView, testFraction: 0.1); + var split = mlContext.Regression.TrainTestSplit(dataView, testFraction: 0.1); // Create the estimator, here we only need LightGbm trainer // as data is already processed in a form consumable by the trainer @@ -50,7 +50,7 @@ public static void Example() learningRate: 0.001); // Fit this pipeline to the training data - var model = pipeline.Fit(trainData); + var model = pipeline.Fit(split.TrainSet); // Get the feature importance based on the information gain used during training. VBuffer weights = default; @@ -60,7 +60,7 @@ public static void Example() Console.WriteLine($"weight 5 - {weightsValues[5]}"); // RoomsPerDwelling (weight 5) = 1 // Evaluate how the model is doing on the test data - var dataWithPredictions = model.Transform(testData); + var dataWithPredictions = model.Transform(split.TestSet); var metrics = mlContext.Regression.Evaluate(dataWithPredictions); SamplesUtils.ConsoleUtils.PrintMetrics(metrics); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegressionWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegressionWithOptions.cs index 8590b181ba..cb04638922 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegressionWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegressionWithOptions.cs @@ -42,7 +42,7 @@ public static void Example() // 21.60 0.02731 00.00 7.070 0 0.4690 6.4210 78.90 // 34.70 0.02729 00.00 7.070 0 0.4690 7.1850 61.10 - var (trainData, testData) = mlContext.Regression.TrainTestSplit(dataView, testFraction: 0.1); + var split = mlContext.Regression.TrainTestSplit(dataView, testFraction: 0.1); // Create a pipeline with LightGbm estimator with advanced options, // here we only need LightGbm trainer as data is already processed @@ -63,7 +63,7 @@ public static void Example() var pipeline = mlContext.Regression.Trainers.LightGbm(options); // Fit this pipeline to the training data - var model = pipeline.Fit(trainData); + var model = pipeline.Fit(split.TrainSet); // Get the feature importance based on the information gain used during training. VBuffer weights = default; @@ -73,7 +73,7 @@ public static void Example() Console.WriteLine($"weight 5 - {weightsValues[5]}"); // RoomsPerDwelling (weight 5) = 1 // Evaluate how the model is doing on the test data - var dataWithPredictions = model.Transform(testData); + var dataWithPredictions = model.Transform(split.TestSet); var metrics = mlContext.Regression.Evaluate(dataWithPredictions, "LabelColumn"); SamplesUtils.ConsoleUtils.PrintMetrics(metrics); From 3e27a883c847e73eb16054f88c1192c49a51f0c1 Mon Sep 17 00:00:00 2001 From: Zeeshan Ahmed Date: Wed, 13 Feb 2019 10:36:07 -0800 Subject: [PATCH 08/14] Addressed reviewers' comments. --- .../BinaryClassification/LightGBMBinaryClassification.cs | 2 +- .../LightGBMBinaryClassificationWithOptions.cs | 2 +- .../LightGBMMulticlassClassification.cs | 2 +- .../LightGBMMulticlassClassificationWithOptions.cs | 2 +- .../Dynamic/Trainers/Regression/LightGBMRegression.cs | 2 +- .../Trainers/Regression/LightGBMRegressionWithOptions.cs | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassification.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassification.cs index d4a4979664..f8342aa7be 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassification.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassification.cs @@ -5,7 +5,7 @@ namespace Microsoft.ML.Samples.Dynamic public class LightGbmBinaryClassification { /// - /// This example require installation of addition nuget package Microsoft.ML.LightGBM + /// This example requires installation of additional nuget package Microsoft.ML.LightGBM /// public static void Example() { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassificationWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassificationWithOptions.cs index fdbf17a1a2..93097192a3 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassificationWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassificationWithOptions.cs @@ -7,7 +7,7 @@ namespace Microsoft.ML.Samples.Dynamic class LightGbmBinaryClassificationWithOptions { /// - /// This example require installation of addition nuget package Microsoft.ML.LightGBM + /// This example requires installation of additional nuget package Microsoft.ML.LightGBM /// public static void Example() { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassification.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassification.cs index 76b5b105ee..6c846308e3 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassification.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassification.cs @@ -8,7 +8,7 @@ namespace Microsoft.ML.Samples.Dynamic class LightGbmMulticlassClassification { /// - /// This example require installation of addition nuget package Microsoft.ML.LightGBM + /// This example requires installation of additional nuget package Microsoft.ML.LightGBM /// public static void Example() { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassificationWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassificationWithOptions.cs index ad310e7ced..6f009ef528 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassificationWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassificationWithOptions.cs @@ -10,7 +10,7 @@ namespace Microsoft.ML.Samples.Dynamic class LightGbmMulticlassClassificationWithOptions { /// - /// This example require installation of addition nuget package Microsoft.ML.LightGBM + /// This example requires installation of additional nuget package Microsoft.ML.LightGBM /// public static void Example() { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegression.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegression.cs index 143b05e067..72fa72babf 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegression.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegression.cs @@ -6,7 +6,7 @@ namespace Microsoft.ML.Samples.Dynamic class LightGbmRegression { /// - /// This example require installation of addition nuget package Microsoft.ML.LightGBM + /// This example requires installation of additional nuget package Microsoft.ML.LightGBM /// public static void Example() { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegressionWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegressionWithOptions.cs index cb04638922..6070909f6f 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegressionWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegressionWithOptions.cs @@ -8,7 +8,7 @@ namespace Microsoft.ML.Samples.Dynamic class LightGbmRegressionWithOptions { /// - /// This example require installation of addition nuget package Microsoft.ML.LightGBM + /// This example requires installation of additional nuget package Microsoft.ML.LightGBM /// public static void Example() { From 571575fb545c824e94926c5a190bfa712f15308d Mon Sep 17 00:00:00 2001 From: Zeeshan Ahmed Date: Wed, 13 Feb 2019 19:08:18 -0800 Subject: [PATCH 09/14] Addressed reviewers' comments. --- .../LightGBMBinaryClassification.cs | 56 ++++--------- ...LightGBMBinaryClassificationWithOptions.cs | 74 ++++++----------- .../LightGBMMulticlassClassification.cs | 18 ++-- ...tGBMMulticlassClassificationWithOptions.cs | 20 ++--- .../Trainers/Regression/LightGBMRegression.cs | 60 ++++++-------- .../LightGBMRegressionWithOptions.cs | 83 ++++++++----------- .../LightGbmArguments.cs | 12 +-- .../Microsoft.ML.SamplesUtils.csproj | 2 + .../SamplesDatasetUtils.cs | 23 ++++- 9 files changed, 149 insertions(+), 199 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassification.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassification.cs index f8342aa7be..edd4e31504 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassification.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassification.cs @@ -4,60 +4,38 @@ namespace Microsoft.ML.Samples.Dynamic { public class LightGbmBinaryClassification { - /// - /// This example requires installation of additional nuget package Microsoft.ML.LightGBM - /// + // This example requires installation of additional nuget package Microsoft.ML.LightGBM. public static void Example() { - // Creating the ML.Net IHostEnvironment object, needed for the pipeline + // Creating the ML.Net IHostEnvironment object, needed for the pipeline. var mlContext = new MLContext(); - // Download the dataset and load it as IDataView - var dataview = SamplesUtils.DatasetUtils.LoadAdultDataset(mlContext); + // Download and featurize the dataset. + var dataview = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); - // Leave out 10% of data for testing + // Leave out 10% of data for testing. var split = mlContext.BinaryClassification.TrainTestSplit(dataview, testFraction: 0.1); - // Create the Estimator - var pipeline = mlContext.Transforms.Categorical.OneHotEncoding(new OneHotEncodingEstimator.ColumnInfo[] - { - new OneHotEncodingEstimator.ColumnInfo("marital-status"), - new OneHotEncodingEstimator.ColumnInfo("occupation"), - new OneHotEncodingEstimator.ColumnInfo("relationship"), - new OneHotEncodingEstimator.ColumnInfo("ethnicity"), - new OneHotEncodingEstimator.ColumnInfo("sex"), - new OneHotEncodingEstimator.ColumnInfo("native-country"), - }) - .Append(mlContext.Transforms.FeatureSelection.SelectFeaturesBasedOnCount("native-country", count: 10)) - .Append(mlContext.Transforms.Concatenate("Features", - "age", - "education-num", - "marital-status", - "relationship", - "ethnicity", - "sex", - "hours-per-week", - "native-country")) - .Append(mlContext.Transforms.Normalize("Features")) - .Append(mlContext.BinaryClassification.Trainers.LightGbm("IsOver50K", "Features")); - - // Fit this Pipeline to the Training Data + // Create the Estimator. + var pipeline = mlContext.BinaryClassification.Trainers.LightGbm("IsOver50K", "Features"); + + // Fit this Pipeline to the Training Data. var model = pipeline.Fit(split.TrainSet); - // Evaluate how the model is doing on the test data + // Evaluate how the model is doing on the test data. var dataWithPredictions = model.Transform(split.TestSet); var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions, "IsOver50K"); SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Output: - // Accuracy: 0.84 - // AUC: 0.88 - // F1 Score: 0.62 - // Negative Precision: 0.88 - // Negative Recall: 0.91 - // Positive Precision: 0.68 - // Positive Recall: 0.59 + // Accuracy: 0.88 + // AUC: 0.93 + // F1 Score: 0.71 + // Negative Precision: 0.90 + // Negative Recall: 0.94 + // Positive Precision: 0.76 + // Positive Recall: 0.66 } } } \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassificationWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassificationWithOptions.cs index 93097192a3..5e0d314424 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassificationWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassificationWithOptions.cs @@ -6,70 +6,48 @@ namespace Microsoft.ML.Samples.Dynamic { class LightGbmBinaryClassificationWithOptions { - /// - /// This example requires installation of additional nuget package Microsoft.ML.LightGBM - /// + // This example requires installation of additional nuget package Microsoft.ML.LightGBM. public static void Example() { // Creating the ML.Net IHostEnvironment object, needed for the pipeline var mlContext = new MLContext(); - // Download the dataset and load it as IDataView - var dataview = SamplesUtils.DatasetUtils.LoadAdultDataset(mlContext); + // Download and featurize the dataset. + var dataview = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); - // Leave out 10% of data for testing + // Leave out 10% of data for testing. var split = mlContext.BinaryClassification.TrainTestSplit(dataview, testFraction: 0.1); - // Create the pipeline with LightGbm Estimator using advanced options - var pipeline = mlContext.Transforms.Categorical.OneHotEncoding(new OneHotEncodingEstimator.ColumnInfo[] - { - new OneHotEncodingEstimator.ColumnInfo("marital-status"), - new OneHotEncodingEstimator.ColumnInfo("occupation"), - new OneHotEncodingEstimator.ColumnInfo("relationship"), - new OneHotEncodingEstimator.ColumnInfo("ethnicity"), - new OneHotEncodingEstimator.ColumnInfo("sex"), - new OneHotEncodingEstimator.ColumnInfo("native-country"), - }) - .Append(mlContext.Transforms.FeatureSelection.SelectFeaturesBasedOnCount("native-country", count: 10)) - .Append(mlContext.Transforms.Concatenate("Features", - "age", - "education-num", - "marital-status", - "relationship", - "ethnicity", - "sex", - "hours-per-week", - "native-country")) - .Append(mlContext.Transforms.Normalize("Features")) - .Append(mlContext.BinaryClassification.Trainers.LightGbm( - new Options - { - LabelColumn = "IsOver50K", - FeatureColumn = "Features", - Booster = new GossBooster.Arguments - { - TopRate = 0.3, - OtherRate = 0.2 - } - })); - - // Fit this Pipeline to the Training Data + // Create the pipeline with LightGbm Estimator using advanced options. + var pipeline = mlContext.BinaryClassification.Trainers.LightGbm( + new Options + { + LabelColumn = "IsOver50K", + FeatureColumn = "Features", + Booster = new GossBooster.Arguments + { + TopRate = 0.3, + OtherRate = 0.2 + } + }); + + // Fit this Pipeline to the Training Data. var model = pipeline.Fit(split.TrainSet); - // Evaluate how the model is doing on the test data + // Evaluate how the model is doing on the test data. var dataWithPredictions = model.Transform(split.TestSet); var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions, "IsOver50K"); SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Output: - // Accuracy: 0.84 - // AUC: 0.88 - // F1 Score: 0.62 - // Negative Precision: 0.88 - // Negative Recall: 0.92 - // Positive Precision: 0.67 - // Positive Recall: 0.58 + // Accuracy: 0.88 + // AUC: 0.93 + // F1 Score: 0.71 + // Negative Precision: 0.90 + // Negative Recall: 0.94 + // Positive Precision: 0.76 + // Positive Recall: 0.67 } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassification.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassification.cs index 6c846308e3..8731c6bc50 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassification.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassification.cs @@ -7,9 +7,7 @@ namespace Microsoft.ML.Samples.Dynamic { class LightGbmMulticlassClassification { - /// - /// This example requires installation of additional nuget package Microsoft.ML.LightGBM - /// + // This example requires installation of additional nuget package Microsoft.ML.LightGBM. public static void Example() { // Create a general context for ML.NET operations. It can be used for exception tracking and logging, @@ -31,7 +29,7 @@ public static void Example() // Create a pipeline. // - Convert the string labels into key types. - // - Apply LightGbm multiclass trainer + // - Apply LightGbm multiclass trainer. var pipeline = mlContext.Transforms.Conversion.MapValueToKey("LabelIndex", "Label") .Append(mlContext.MulticlassClassification.Trainers.LightGbm(labelColumn: "LabelIndex")) .Append(mlContext.Transforms.Conversion.MapValueToKey("PredictedLabelIndex", "PredictedLabel")) @@ -53,9 +51,9 @@ public static void Example() // Check if metrics are reasonable. Console.WriteLine($"Macro accuracy: {metrics.AccuracyMacro:F4}, Micro accuracy: {metrics.AccuracyMicro:F4}."); // Console output: - // Macro accuracy: 0.8635, Micro accuracy: 0.8631. + // Macro accuracy: 0.8655, Micro accuracy: 0.8651. - // IDataView with predictions, to an IEnumerable + // IDataView with predictions, to an IEnumerable. var nativePredictions = mlContext.CreateEnumerable(dataWithPredictions, false).ToList(); // Get schema object out of the prediction. It contains metadata such as the mapping from predicted label index @@ -70,15 +68,15 @@ public static void Example() // Show prediction result for the 3rd example. var nativePrediction = nativePredictions[2]; // Console output: - // Our predicted label to this example is "AA" with probability 0.9226. + // Our predicted label to this example is "AA" with probability 0.9257. Console.WriteLine($"Our predicted label to this example is {nativeLabels[(int)nativePrediction.PredictedLabelIndex - 1]} " + $"with probability {nativePrediction.Scores[(int)nativePrediction.PredictedLabelIndex - 1]:F4}."); // Scores and nativeLabels are two parallel attributes; that is, Scores[i] is the probability of being nativeLabels[i]. // Console output: - // The probability of being class "AA" is 0.9226. - // The probability of being class "BB" is 0.0751. - // The probability of being class "CC" is 0.0022. + // The probability of being class "AA" is 0.9257. + // The probability of being class "BB" is 0.0739. + // The probability of being class "CC" is 0.0002. // The probability of being class "DD" is 0.0001. for (int i = 0; i < nativeLabels.Length; ++i) Console.WriteLine($"The probability of being class {nativeLabels[i]} is {nativePrediction.Scores[i]:F4}."); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassificationWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassificationWithOptions.cs index 6f009ef528..653615dca0 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassificationWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassificationWithOptions.cs @@ -9,9 +9,7 @@ namespace Microsoft.ML.Samples.Dynamic { class LightGbmMulticlassClassificationWithOptions { - /// - /// This example requires installation of additional nuget package Microsoft.ML.LightGBM - /// + // This example requires installation of additional nuget package Microsoft.ML.LightGBM. public static void Example() { // Create a general context for ML.NET operations. It can be used for exception tracking and logging, @@ -33,7 +31,7 @@ public static void Example() // Create a pipeline. // - Convert the string labels into key types. - // - Apply LightGbm multiclass trainer with advanced options + // - Apply LightGbm multiclass trainer with advanced options. var pipeline = mlContext.Transforms.Conversion.MapValueToKey("LabelIndex", "Label") .Append(mlContext.MulticlassClassification.Trainers.LightGbm(new Options { @@ -64,9 +62,9 @@ public static void Example() // Check if metrics are reasonable. Console.WriteLine($"Macro accuracy: {metrics.AccuracyMacro:F4}, Micro accuracy: {metrics.AccuracyMicro:F4}."); // Console output: - // Macro accuracy: 0.8562, Micro accuracy: 0.8552. + // Macro accuracy: 0.8619, Micro accuracy: 0.8611. - // IDataView with predictions, to an IEnumerable + // IDataView with predictions, to an IEnumerable. var nativePredictions = mlContext.CreateEnumerable(dataWithPredictions, false).ToList(); // Get schema object out of the prediction. It contains metadata such as the mapping from predicted label index @@ -81,16 +79,16 @@ public static void Example() // Show prediction result for the 3rd example. var nativePrediction = nativePredictions[2]; // Console output: - // Our predicted label to this example is AA with probability 0.9592. + // Our predicted label to this example is AA with probability 0.8986. Console.WriteLine($"Our predicted label to this example is {nativeLabels[(int)nativePrediction.PredictedLabelIndex - 1]} " + $"with probability {nativePrediction.Scores[(int)nativePrediction.PredictedLabelIndex - 1]:F4}."); // Scores and nativeLabels are two parallel attributes; that is, Scores[i] is the probability of being nativeLabels[i]. // Console output: - // The probability of being class AA is 0.9592. - // The probability of being class BB is 0.0392. - // The probability of being class CC is 0.0015. - // The probability of being class DD is 0.0002. + // The probability of being class AA is 0.8986. + // The probability of being class BB is 0.0961. + // The probability of being class CC is 0.0050. + // The probability of being class DD is 0.0003. for (int i = 0; i < nativeLabels.Length; ++i) Console.WriteLine($"The probability of being class {nativeLabels[i]} is {nativePrediction.Scores[i]:F4}."); } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegression.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegression.cs index 72fa72babf..c4b6f9f68c 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegression.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegression.cs @@ -1,67 +1,57 @@ using System; +using System.Linq; using Microsoft.ML.Data; namespace Microsoft.ML.Samples.Dynamic { class LightGbmRegression { - /// - /// This example requires installation of additional nuget package Microsoft.ML.LightGBM - /// + // This example requires installation of additional nuget package Microsoft.ML.LightGBM. public static void Example() { - // Downloading a regression dataset from github.com/dotnet/machinelearning - // this will create a housing.txt file in the filsystem this code will run - // you can open the file to see the data. - string dataFile = SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset(); - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. var mlContext = new MLContext(); - // Creating a data reader, based on the format of the data - // The data is tab separated with all numeric columns. - // The first column being the label and rest are numeric features - // Here only seven numeric columns are used as features - var dataView = mlContext.Data.ReadFromTextFile(dataFile, new TextLoader.Arguments - { - Separators = new[] { '\t' }, - HasHeader = true, - Columns = new[] - { - new TextLoader.Column("Label", DataKind.R4, 0), - new TextLoader.Column("Features", DataKind.R4, 1, 6) - } - }); + // Download and load the housing dataset into an IDataView. + var dataView = SamplesUtils.DatasetUtils.LoadHousingRegressionDataset(mlContext); //////////////////// Data Preview //////////////////// - // MedianHomeValue CrimesPerCapita PercentResidental PercentNonRetail CharlesRiver NitricOxides RoomsPerDwelling PercentPre40s - // 24.00 0.00632 18.00 2.310 0 0.5380 6.5750 65.20 - // 21.60 0.02731 00.00 7.070 0 0.4690 6.4210 78.90 - // 34.70 0.02729 00.00 7.070 0 0.4690 7.1850 61.10 + /// Only 6 columns are displayed here. + // MedianHomeValue CrimesPerCapita PercentResidental PercentNonRetail CharlesRiver NitricOxides RoomsPerDwelling PercentPre40s ... + // 24.00 0.00632 18.00 2.310 0 0.5380 6.5750 65.20 ... + // 21.60 0.02731 00.00 7.070 0 0.4690 6.4210 78.90 ... + // 34.70 0.02729 00.00 7.070 0 0.4690 7.1850 61.10 ... var split = mlContext.Regression.TrainTestSplit(dataView, testFraction: 0.1); - // Create the estimator, here we only need LightGbm trainer - // as data is already processed in a form consumable by the trainer - var pipeline = mlContext.Regression.Trainers.LightGbm( + // Create the estimator, here we only need LightGbm trainer + // as data is already processed in a form consumable by the trainer. + var labelName = "MedianHomeValue"; + var featureNames = dataView.Schema + .Select(column => column.Name) // Get the column names + .Where(name => name != labelName) // Drop the Label + .ToArray(); + var pipeline = mlContext.Transforms.Concatenate("Features", featureNames) + .Append(mlContext.Regression.Trainers.LightGbm( + labelColumn: labelName, numLeaves: 4, minDataPerLeaf: 6, - learningRate: 0.001); + learningRate: 0.001)); - // Fit this pipeline to the training data + // Fit this pipeline to the training data. var model = pipeline.Fit(split.TrainSet); // Get the feature importance based on the information gain used during training. VBuffer weights = default; - model.Model.GetFeatureWeights(ref weights); - var weightsValues = weights.GetValues(); + model.LastTransformer.Model.GetFeatureWeights(ref weights); + var weightsValues = weights.DenseValues().ToArray(); Console.WriteLine($"weight 0 - {weightsValues[0]}"); // CrimesPerCapita (weight 0) = 0.1898361 Console.WriteLine($"weight 5 - {weightsValues[5]}"); // RoomsPerDwelling (weight 5) = 1 - // Evaluate how the model is doing on the test data + // Evaluate how the model is doing on the test data. var dataWithPredictions = model.Transform(split.TestSet); - var metrics = mlContext.Regression.Evaluate(dataWithPredictions); + var metrics = mlContext.Regression.Evaluate(dataWithPredictions, label: labelName); SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Output diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegressionWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegressionWithOptions.cs index 6070909f6f..7a97641251 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegressionWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegressionWithOptions.cs @@ -1,4 +1,5 @@ using System; +using System.Linq; using Microsoft.ML.Data; using Microsoft.ML.LightGBM; using static Microsoft.ML.LightGBM.Options; @@ -7,74 +8,60 @@ namespace Microsoft.ML.Samples.Dynamic { class LightGbmRegressionWithOptions { - /// - /// This example requires installation of additional nuget package Microsoft.ML.LightGBM - /// + // This example requires installation of additional nuget package Microsoft.ML.LightGBM. public static void Example() { - // Downloading a regression dataset from github.com/dotnet/machinelearning - // this will create a housing.txt file in the filsystem this code will run - // you can open the file to see the data. - string dataFile = SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset(); - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. var mlContext = new MLContext(); - // Creating a data reader, based on the format of the data - // The data is tab separated with all numeric columns. - // The first column being the label and rest are numeric features - // Here only seven numeric columns are used as features - var dataView = mlContext.Data.ReadFromTextFile(dataFile, new TextLoader.Arguments - { - Separators = new[] { '\t' }, - HasHeader = true, - Columns = new[] - { - new TextLoader.Column("LabelColumn", DataKind.R4, 0), - new TextLoader.Column("FeaturesColumn", DataKind.R4, 1, 6) - } - }); + // Download and load the housing dataset into an IDataView. + var dataView = SamplesUtils.DatasetUtils.LoadHousingRegressionDataset(mlContext); //////////////////// Data Preview //////////////////// - // MedianHomeValue CrimesPerCapita PercentResidental PercentNonRetail CharlesRiver NitricOxides RoomsPerDwelling PercentPre40s - // 24.00 0.00632 18.00 2.310 0 0.5380 6.5750 65.20 - // 21.60 0.02731 00.00 7.070 0 0.4690 6.4210 78.90 - // 34.70 0.02729 00.00 7.070 0 0.4690 7.1850 61.10 + /// Only 6 columns are displayed here. + // MedianHomeValue CrimesPerCapita PercentResidental PercentNonRetail CharlesRiver NitricOxides RoomsPerDwelling PercentPre40s ... + // 24.00 0.00632 18.00 2.310 0 0.5380 6.5750 65.20 ... + // 21.60 0.02731 00.00 7.070 0 0.4690 6.4210 78.90 ... + // 34.70 0.02729 00.00 7.070 0 0.4690 7.1850 61.10 ... var split = mlContext.Regression.TrainTestSplit(dataView, testFraction: 0.1); - // Create a pipeline with LightGbm estimator with advanced options, - // here we only need LightGbm trainer as data is already processed - // in a form consumable by the trainer - var options = new Options - { - LabelColumn = "LabelColumn", - FeatureColumn = "FeaturesColumn", - NumLeaves = 4, - MinDataPerLeaf = 6, - LearningRate = 0.001, - Booster = new GossBooster.Arguments - { - TopRate = 0.3, - OtherRate = 0.2 - } - }; - var pipeline = mlContext.Regression.Trainers.LightGbm(options); + // Create a pipeline with LightGbm estimator with advanced options. + // Here we only need LightGbm trainer as data is already processed + // in a form consumable by the trainer. + var labelName = "MedianHomeValue"; + var featureNames = dataView.Schema + .Select(column => column.Name) // Get the column names + .Where(name => name != labelName) // Drop the Label + .ToArray(); + var pipeline = mlContext.Transforms.Concatenate("Features", featureNames) + .Append(mlContext.Regression.Trainers.LightGbm(new Options + { + LabelColumn = labelName, + NumLeaves = 4, + MinDataPerLeaf = 6, + LearningRate = 0.001, + Booster = new GossBooster.Arguments + { + TopRate = 0.3, + OtherRate = 0.2 + } + })); - // Fit this pipeline to the training data + // Fit this pipeline to the training data. var model = pipeline.Fit(split.TrainSet); // Get the feature importance based on the information gain used during training. VBuffer weights = default; - model.Model.GetFeatureWeights(ref weights); - var weightsValues = weights.GetValues(); + model.LastTransformer.Model.GetFeatureWeights(ref weights); + var weightsValues = weights.DenseValues().ToArray(); Console.WriteLine($"weight 0 - {weightsValues[0]}"); // CrimesPerCapita (weight 0) = 0.1898361 Console.WriteLine($"weight 5 - {weightsValues[5]}"); // RoomsPerDwelling (weight 5) = 1 - // Evaluate how the model is doing on the test data + // Evaluate how the model is doing on the test data. var dataWithPredictions = model.Transform(split.TestSet); - var metrics = mlContext.Regression.Evaluate(dataWithPredictions, "LabelColumn"); + var metrics = mlContext.Regression.Evaluate(dataWithPredictions, label: labelName); SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Output diff --git a/src/Microsoft.ML.LightGBM/LightGbmArguments.cs b/src/Microsoft.ML.LightGBM/LightGbmArguments.cs index 926b96aaa2..aee672a5f1 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmArguments.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmArguments.cs @@ -27,11 +27,11 @@ namespace Microsoft.ML.LightGBM internal delegate void SignatureLightGBMBooster(); [TlcModule.ComponentKind("BoosterParameterFunction")] - public interface ISupportBoosterParameterFactory : IComponentFactory + internal interface ISupportBoosterParameterFactory : IComponentFactory { } - public interface IBoosterParameter + internal interface IBoosterParameter { void UpdateParameters(Dictionary res); } @@ -100,7 +100,7 @@ internal static class Defaults public sealed class TreeBooster : BoosterParameter { - public const string Name = "gbdt"; + internal const string Name = "gbdt"; internal const string FriendlyName = "Tree Booster"; [TlcModule.Component(Name = Name, FriendlyName = FriendlyName, Desc = "Traditional Gradient Boosting Decision Tree.")] @@ -190,7 +190,7 @@ internal override void UpdateParameters(Dictionary res) public class DartBooster : BoosterParameter { - public const string Name = "dart"; + internal const string Name = "dart"; internal const string FriendlyName = "Tree Dropout Tree Booster"; [TlcModule.Component(Name = Name, FriendlyName = FriendlyName, Desc = "Dropouts meet Multiple Additive Regresion Trees. See https://arxiv.org/abs/1505.01866")] @@ -234,7 +234,7 @@ internal override void UpdateParameters(Dictionary res) public class GossBooster : BoosterParameter { - public const string Name = "goss"; + internal const string Name = "goss"; internal const string FriendlyName = "Gradient-based One-Size Sampling"; [TlcModule.Component(Name = Name, FriendlyName = FriendlyName, Desc = "Gradient-based One-Side Sampling.")] @@ -311,7 +311,7 @@ public enum EvalMetricType public int MaxBin = 255; [Argument(ArgumentType.Multiple, HelpText = "Which booster to use, can be gbtree, gblinear or dart. gbtree and dart use tree based model while gblinear uses linear function.", SortOrder = 3)] - public ISupportBoosterParameterFactory Booster = new TreeBooster.Arguments(); + public TreeBooster.Arguments Booster = new TreeBooster.Arguments(); [Argument(ArgumentType.AtMostOnce, HelpText = "Verbose", ShortName = "v")] public bool VerboseEval = false; diff --git a/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj b/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj index e4d6c5d504..0bdb047d42 100644 --- a/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj +++ b/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj @@ -6,7 +6,9 @@ + + diff --git a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs index 19b95dc6cc..10a5d5fb68 100644 --- a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs +++ b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs @@ -86,7 +86,7 @@ public static string DownloadSentimentDataset() public static string DownloadAdultDataset() => Download("https://raw.githubusercontent.com/dotnet/machinelearning/244a8c2ac832657af282aa312d568211698790aa/test/data/adult.train", "adult.txt"); - public static IDataView LoadAdultDataset(MLContext mlContext) + public static IDataView LoadFeaturizedAdultDataset(MLContext mlContext) { // Download the file string dataFile = DownloadAdultDataset(); @@ -115,7 +115,26 @@ public static IDataView LoadAdultDataset(MLContext mlContext) hasHeader: true ); - return reader.Read(dataFile); + // Create data featurizing pipeline + var pipeline = mlContext.Transforms.CopyColumns("Label", "IsOver50K") + // Convert categorical features to one-hot vectors + .Append(mlContext.Transforms.Categorical.OneHotEncoding("workclass")) + .Append(mlContext.Transforms.Categorical.OneHotEncoding("education")) + .Append(mlContext.Transforms.Categorical.OneHotEncoding("marital-status")) + .Append(mlContext.Transforms.Categorical.OneHotEncoding("occupation")) + .Append(mlContext.Transforms.Categorical.OneHotEncoding("relationship")) + .Append(mlContext.Transforms.Categorical.OneHotEncoding("ethnicity")) + .Append(mlContext.Transforms.Categorical.OneHotEncoding("native-country")) + // Combine all features into one feature vector + .Append(mlContext.Transforms.Concatenate("Features", "workclass", "education", "marital-status", + "occupation", "relationship", "ethnicity", "native-country", "age", "education-num", + "capital-gain", "capital-loss", "hours-per-week")) + // Min-max normalized all the features + .Append(mlContext.Transforms.Normalize("Features")); + + var data = reader.Read(dataFile); + var featurizedData = pipeline.Fit(data).Transform(data); + return featurizedData; } /// From fc60edd665b9c60dd937977eb2c8ed9a7c21d2f3 Mon Sep 17 00:00:00 2001 From: Zeeshan Ahmed Date: Thu, 14 Feb 2019 14:18:07 -0800 Subject: [PATCH 10/14] Created two parameters for Booster. One for EntryPoint and one for cmdline. --- src/Microsoft.ML.LightGBM/LightGbmArguments.cs | 5 ++++- test/BaselineOutput/Common/EntryPoints/core_manifest.json | 8 ++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/Microsoft.ML.LightGBM/LightGbmArguments.cs b/src/Microsoft.ML.LightGBM/LightGbmArguments.cs index aee672a5f1..8feb3f6f0f 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmArguments.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmArguments.cs @@ -310,7 +310,10 @@ public enum EvalMetricType [Argument(ArgumentType.AtMostOnce, HelpText = "Max number of bucket bin for features.", ShortName = "mb")] public int MaxBin = 255; - [Argument(ArgumentType.Multiple, HelpText = "Which booster to use, can be gbtree, gblinear or dart. gbtree and dart use tree based model while gblinear uses linear function.", SortOrder = 3)] + [Argument(ArgumentType.Multiple, HelpText = "Which booster to use, can be gbtree, gblinear or dart. gbtree and dart use tree based model while gblinear uses linear function.", SortOrder = 3, Visibility = ArgumentAttribute.VisibilityType.EntryPointsOnly)] + internal ISupportBoosterParameterFactory BoosterFactory = new TreeBooster.Arguments(); + + [Argument(ArgumentType.Multiple, HelpText = "Which booster to use, can be gbtree, gblinear or dart. gbtree and dart use tree based model while gblinear uses linear function.", SortOrder = 3, Visibility = ArgumentAttribute.VisibilityType.CmdLineOnly)] public TreeBooster.Arguments Booster = new TreeBooster.Arguments(); [Argument(ArgumentType.AtMostOnce, HelpText = "Verbose", ShortName = "v")] diff --git a/test/BaselineOutput/Common/EntryPoints/core_manifest.json b/test/BaselineOutput/Common/EntryPoints/core_manifest.json index 5774a20a47..cb6bbfa48e 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_manifest.json +++ b/test/BaselineOutput/Common/EntryPoints/core_manifest.json @@ -11258,7 +11258,7 @@ "Default": "Features" }, { - "Name": "Booster", + "Name": "BoosterFactory", "Type": { "Kind": "Component", "ComponentKind": "BoosterParameterFunction" @@ -11762,7 +11762,7 @@ "Default": "Features" }, { - "Name": "Booster", + "Name": "BoosterFactory", "Type": { "Kind": "Component", "ComponentKind": "BoosterParameterFunction" @@ -12266,7 +12266,7 @@ "Default": "Features" }, { - "Name": "Booster", + "Name": "BoosterFactory", "Type": { "Kind": "Component", "ComponentKind": "BoosterParameterFunction" @@ -12770,7 +12770,7 @@ "Default": "Features" }, { - "Name": "Booster", + "Name": "BoosterFactory", "Type": { "Kind": "Component", "ComponentKind": "BoosterParameterFunction" From 31ceabb667aad8fbf98cb1bb188586d8a0338c8d Mon Sep 17 00:00:00 2001 From: Zeeshan Ahmed Date: Thu, 14 Feb 2019 14:24:30 -0800 Subject: [PATCH 11/14] Making all the Booster classes sealed. --- src/Microsoft.ML.LightGBM/LightGbmArguments.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Microsoft.ML.LightGBM/LightGbmArguments.cs b/src/Microsoft.ML.LightGBM/LightGbmArguments.cs index 8feb3f6f0f..05dfa7b7c7 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmArguments.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmArguments.cs @@ -188,7 +188,7 @@ internal override void UpdateParameters(Dictionary res) } } - public class DartBooster : BoosterParameter + public sealed class DartBooster : BoosterParameter { internal const string Name = "dart"; internal const string FriendlyName = "Tree Dropout Tree Booster"; @@ -232,7 +232,7 @@ internal override void UpdateParameters(Dictionary res) } } - public class GossBooster : BoosterParameter + public sealed class GossBooster : BoosterParameter { internal const string Name = "goss"; internal const string FriendlyName = "Gradient-based One-Size Sampling"; From 1b54da42d537450412a9276eee926d0f13f5081d Mon Sep 17 00:00:00 2001 From: Zeeshan Ahmed Date: Thu, 14 Feb 2019 14:32:10 -0800 Subject: [PATCH 12/14] Resolved conflicts in SamplesDatasetUtils.cs --- .../SamplesDatasetUtils.cs | 51 ------------------- 1 file changed, 51 deletions(-) diff --git a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs index 363190bace..203bd6e6bd 100644 --- a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs +++ b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs @@ -87,57 +87,6 @@ public static string DownloadSentimentDataset() public static string DownloadAdultDataset() => Download("https://raw.githubusercontent.com/dotnet/machinelearning/244a8c2ac832657af282aa312d568211698790aa/test/data/adult.train", "adult.txt"); - public static IDataView LoadFeaturizedAdultDataset(MLContext mlContext) - { - // Download the file - string dataFile = DownloadAdultDataset(); - - // Define the columns to read - var reader = mlContext.Data.CreateTextLoader( - columns: new[] - { - new TextLoader.Column("age", DataKind.R4, 0), - new TextLoader.Column("workclass", DataKind.TX, 1), - new TextLoader.Column("fnlwgt", DataKind.R4, 2), - new TextLoader.Column("education", DataKind.TX, 3), - new TextLoader.Column("education-num", DataKind.R4, 4), - new TextLoader.Column("marital-status", DataKind.TX, 5), - new TextLoader.Column("occupation", DataKind.TX, 6), - new TextLoader.Column("relationship", DataKind.TX, 7), - new TextLoader.Column("ethnicity", DataKind.TX, 8), - new TextLoader.Column("sex", DataKind.TX, 9), - new TextLoader.Column("capital-gain", DataKind.R4, 10), - new TextLoader.Column("capital-loss", DataKind.R4, 11), - new TextLoader.Column("hours-per-week", DataKind.R4, 12), - new TextLoader.Column("native-country", DataKind.R4, 13), - new TextLoader.Column("IsOver50K", DataKind.BL, 14), - }, - separatorChar: ',', - hasHeader: true - ); - - // Create data featurizing pipeline - var pipeline = mlContext.Transforms.CopyColumns("Label", "IsOver50K") - // Convert categorical features to one-hot vectors - .Append(mlContext.Transforms.Categorical.OneHotEncoding("workclass")) - .Append(mlContext.Transforms.Categorical.OneHotEncoding("education")) - .Append(mlContext.Transforms.Categorical.OneHotEncoding("marital-status")) - .Append(mlContext.Transforms.Categorical.OneHotEncoding("occupation")) - .Append(mlContext.Transforms.Categorical.OneHotEncoding("relationship")) - .Append(mlContext.Transforms.Categorical.OneHotEncoding("ethnicity")) - .Append(mlContext.Transforms.Categorical.OneHotEncoding("native-country")) - // Combine all features into one feature vector - .Append(mlContext.Transforms.Concatenate("Features", "workclass", "education", "marital-status", - "occupation", "relationship", "ethnicity", "native-country", "age", "education-num", - "capital-gain", "capital-loss", "hours-per-week")) - // Min-max normalized all the features - .Append(mlContext.Transforms.Normalize("Features")); - - var data = reader.Read(dataFile); - var featurizedData = pipeline.Fit(data).Transform(data); - return featurizedData; - } - /// /// Downloads the Adult UCI dataset and featurizes it to be suitable for classification tasks. /// From ed2ea83e94948bca41e83b0743b304efb42197a2 Mon Sep 17 00:00:00 2001 From: Zeeshan Ahmed Date: Thu, 14 Feb 2019 15:46:09 -0800 Subject: [PATCH 13/14] Reverted changes related to making `ISupportBoosterParameterFactory` internal. --- src/Microsoft.ML.LightGBM/LightGbmArguments.cs | 11 ++++------- .../Common/EntryPoints/core_manifest.json | 8 ++++---- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/src/Microsoft.ML.LightGBM/LightGbmArguments.cs b/src/Microsoft.ML.LightGBM/LightGbmArguments.cs index 05dfa7b7c7..bc7e1ced24 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmArguments.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmArguments.cs @@ -27,11 +27,11 @@ namespace Microsoft.ML.LightGBM internal delegate void SignatureLightGBMBooster(); [TlcModule.ComponentKind("BoosterParameterFunction")] - internal interface ISupportBoosterParameterFactory : IComponentFactory + public interface ISupportBoosterParameterFactory : IComponentFactory { } - internal interface IBoosterParameter + public interface IBoosterParameter { void UpdateParameters(Dictionary res); } @@ -310,11 +310,8 @@ public enum EvalMetricType [Argument(ArgumentType.AtMostOnce, HelpText = "Max number of bucket bin for features.", ShortName = "mb")] public int MaxBin = 255; - [Argument(ArgumentType.Multiple, HelpText = "Which booster to use, can be gbtree, gblinear or dart. gbtree and dart use tree based model while gblinear uses linear function.", SortOrder = 3, Visibility = ArgumentAttribute.VisibilityType.EntryPointsOnly)] - internal ISupportBoosterParameterFactory BoosterFactory = new TreeBooster.Arguments(); - - [Argument(ArgumentType.Multiple, HelpText = "Which booster to use, can be gbtree, gblinear or dart. gbtree and dart use tree based model while gblinear uses linear function.", SortOrder = 3, Visibility = ArgumentAttribute.VisibilityType.CmdLineOnly)] - public TreeBooster.Arguments Booster = new TreeBooster.Arguments(); + [Argument(ArgumentType.Multiple, HelpText = "Which booster to use, can be gbtree, gblinear or dart. gbtree and dart use tree based model while gblinear uses linear function.", SortOrder = 3)] + public ISupportBoosterParameterFactory Booster = new TreeBooster.Arguments(); [Argument(ArgumentType.AtMostOnce, HelpText = "Verbose", ShortName = "v")] public bool VerboseEval = false; diff --git a/test/BaselineOutput/Common/EntryPoints/core_manifest.json b/test/BaselineOutput/Common/EntryPoints/core_manifest.json index 968b323886..35dd5c085b 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_manifest.json +++ b/test/BaselineOutput/Common/EntryPoints/core_manifest.json @@ -11248,7 +11248,7 @@ "Default": "Features" }, { - "Name": "BoosterFactory", + "Name": "Booster", "Type": { "Kind": "Component", "ComponentKind": "BoosterParameterFunction" @@ -11752,7 +11752,7 @@ "Default": "Features" }, { - "Name": "BoosterFactory", + "Name": "Booster", "Type": { "Kind": "Component", "ComponentKind": "BoosterParameterFunction" @@ -12256,7 +12256,7 @@ "Default": "Features" }, { - "Name": "BoosterFactory", + "Name": "Booster", "Type": { "Kind": "Component", "ComponentKind": "BoosterParameterFunction" @@ -12760,7 +12760,7 @@ "Default": "Features" }, { - "Name": "BoosterFactory", + "Name": "Booster", "Type": { "Kind": "Component", "ComponentKind": "BoosterParameterFunction" From d1d41097695bcf7081344944439c20c4a96a0c75 Mon Sep 17 00:00:00 2001 From: Zeeshan Ahmed Date: Thu, 14 Feb 2019 16:02:06 -0800 Subject: [PATCH 14/14] Changed Arguments classes to Options. --- ...LightGBMBinaryClassificationWithOptions.cs | 2 +- ...tGBMMulticlassClassificationWithOptions.cs | 2 +- .../LightGBMRegressionWithOptions.cs | 2 +- .../LightGbmArguments.cs | 32 +++++++++---------- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassificationWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassificationWithOptions.cs index 5e0d314424..20924bc29f 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassificationWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassificationWithOptions.cs @@ -24,7 +24,7 @@ public static void Example() { LabelColumn = "IsOver50K", FeatureColumn = "Features", - Booster = new GossBooster.Arguments + Booster = new GossBooster.Options { TopRate = 0.3, OtherRate = 0.2 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassificationWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassificationWithOptions.cs index 653615dca0..7d98c9318e 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassificationWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassificationWithOptions.cs @@ -37,7 +37,7 @@ public static void Example() { LabelColumn = "LabelIndex", FeatureColumn = "Features", - Booster = new DartBooster.Arguments + Booster = new DartBooster.Options { DropRate = 0.15, XgboostDartMode = false diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegressionWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegressionWithOptions.cs index 7a97641251..3f73df053e 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegressionWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegressionWithOptions.cs @@ -42,7 +42,7 @@ public static void Example() NumLeaves = 4, MinDataPerLeaf = 6, LearningRate = 0.001, - Booster = new GossBooster.Arguments + Booster = new GossBooster.Options { TopRate = 0.3, OtherRate = 0.2 diff --git a/src/Microsoft.ML.LightGBM/LightGbmArguments.cs b/src/Microsoft.ML.LightGBM/LightGbmArguments.cs index bc7e1ced24..0085ad31aa 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmArguments.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmArguments.cs @@ -11,16 +11,16 @@ using Microsoft.ML.Internal.Internallearn; using Microsoft.ML.LightGBM; -[assembly: LoadableClass(typeof(Options.TreeBooster), typeof(Options.TreeBooster.Arguments), +[assembly: LoadableClass(typeof(Options.TreeBooster), typeof(Options.TreeBooster.Options), typeof(SignatureLightGBMBooster), Options.TreeBooster.FriendlyName, Options.TreeBooster.Name)] -[assembly: LoadableClass(typeof(Options.DartBooster), typeof(Options.DartBooster.Arguments), +[assembly: LoadableClass(typeof(Options.DartBooster), typeof(Options.DartBooster.Options), typeof(SignatureLightGBMBooster), Options.DartBooster.FriendlyName, Options.DartBooster.Name)] -[assembly: LoadableClass(typeof(Options.GossBooster), typeof(Options.GossBooster.Arguments), +[assembly: LoadableClass(typeof(Options.GossBooster), typeof(Options.GossBooster.Options), typeof(SignatureLightGBMBooster), Options.GossBooster.FriendlyName, Options.GossBooster.Name)] -[assembly: EntryPointModule(typeof(Options.TreeBooster.Arguments))] -[assembly: EntryPointModule(typeof(Options.DartBooster.Arguments))] -[assembly: EntryPointModule(typeof(Options.GossBooster.Arguments))] +[assembly: EntryPointModule(typeof(Options.TreeBooster.Options))] +[assembly: EntryPointModule(typeof(Options.DartBooster.Options))] +[assembly: EntryPointModule(typeof(Options.GossBooster.Options))] namespace Microsoft.ML.LightGBM { @@ -98,13 +98,13 @@ internal static class Defaults public const int NumBoostRound = 100; } - public sealed class TreeBooster : BoosterParameter + public sealed class TreeBooster : BoosterParameter { internal const string Name = "gbdt"; internal const string FriendlyName = "Tree Booster"; [TlcModule.Component(Name = Name, FriendlyName = FriendlyName, Desc = "Traditional Gradient Boosting Decision Tree.")] - public class Arguments : ISupportBoosterParameterFactory + public class Options : ISupportBoosterParameterFactory { [Argument(ArgumentType.AtMostOnce, HelpText = "Use for binary classification when classes are not balanced.", ShortName = "us")] public bool UnbalancedSets = false; @@ -171,7 +171,7 @@ public class Arguments : ISupportBoosterParameterFactory IBoosterParameter IComponentFactory.CreateComponent(IHostEnvironment env) => CreateComponent(env); } - internal TreeBooster(Arguments args) + internal TreeBooster(Options args) : base(args) { Contracts.CheckUserArg(Args.MinSplitGain >= 0, nameof(Args.MinSplitGain), "must be >= 0."); @@ -188,13 +188,13 @@ internal override void UpdateParameters(Dictionary res) } } - public sealed class DartBooster : BoosterParameter + public sealed class DartBooster : BoosterParameter { internal const string Name = "dart"; internal const string FriendlyName = "Tree Dropout Tree Booster"; [TlcModule.Component(Name = Name, FriendlyName = FriendlyName, Desc = "Dropouts meet Multiple Additive Regresion Trees. See https://arxiv.org/abs/1505.01866")] - public class Arguments : TreeBooster.Arguments + public class Options : TreeBooster.Options { [Argument(ArgumentType.AtMostOnce, HelpText = "Drop ratio for trees. Range:(0,1).")] [TlcModule.Range(Inf = 0.0, Max = 1.0)] @@ -217,7 +217,7 @@ public class Arguments : TreeBooster.Arguments internal override IBoosterParameter CreateComponent(IHostEnvironment env) => new DartBooster(this); } - internal DartBooster(Arguments args) + internal DartBooster(Options args) : base(args) { Contracts.CheckUserArg(Args.DropRate > 0 && Args.DropRate < 1, nameof(Args.DropRate), "must be in (0,1)."); @@ -232,13 +232,13 @@ internal override void UpdateParameters(Dictionary res) } } - public sealed class GossBooster : BoosterParameter + public sealed class GossBooster : BoosterParameter { internal const string Name = "goss"; internal const string FriendlyName = "Gradient-based One-Size Sampling"; [TlcModule.Component(Name = Name, FriendlyName = FriendlyName, Desc = "Gradient-based One-Side Sampling.")] - public class Arguments : TreeBooster.Arguments + public class Options : TreeBooster.Options { [Argument(ArgumentType.AtMostOnce, HelpText = "Retain ratio for large gradient instances.")] @@ -254,7 +254,7 @@ public class Arguments : TreeBooster.Arguments internal override IBoosterParameter CreateComponent(IHostEnvironment env) => new GossBooster(this); } - internal GossBooster(Arguments args) + internal GossBooster(Options args) : base(args) { Contracts.CheckUserArg(Args.TopRate > 0 && Args.TopRate < 1, nameof(Args.TopRate), "must be in (0,1)."); @@ -311,7 +311,7 @@ public enum EvalMetricType public int MaxBin = 255; [Argument(ArgumentType.Multiple, HelpText = "Which booster to use, can be gbtree, gblinear or dart. gbtree and dart use tree based model while gblinear uses linear function.", SortOrder = 3)] - public ISupportBoosterParameterFactory Booster = new TreeBooster.Arguments(); + public ISupportBoosterParameterFactory Booster = new TreeBooster.Options(); [Argument(ArgumentType.AtMostOnce, HelpText = "Verbose", ShortName = "v")] public bool VerboseEval = false;