-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Lockdown of Microsoft.ML.LightGBM public surface. #2476
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
20 commits
Select commit
Hold shift + click to select a range
59d1a08
Lockdown of Microsoft.ML.LightGBM public surface.
zeahmed 97a2e59
Merge remote-tracking branch 'upstream/master' into LightGBM_refact
zeahmed a6c3357
Addressed reviewers' comments.
zeahmed 01f4188
Addressed reviewers' comments.
zeahmed 85d53d1
Addressed reviewers' comments and added more samples.
zeahmed 3f91b54
Undo changes in LightGbmStaticExtension.cs
zeahmed 9245470
Addressed reviewers' comments.
zeahmed 95de1a1
Merge remote-tracking branch 'upstream/master' into LightGBM_refact
zeahmed 49509c1
Merged with the base and resolved merge conflicts.
zeahmed 3e27a88
Addressed reviewers' comments.
zeahmed 6c8ac35
Merge remote-tracking branch 'upstream/master' into LightGBM_refact
zeahmed 571575f
Addressed reviewers' comments.
zeahmed 112636b
Merge remote-tracking branch 'upstream/master' into LightGBM_refact
zeahmed fc60edd
Created two parameters for Booster. One for EntryPoint and one for cm…
zeahmed 69e51bf
Merge branch 'LightGBM_refact' of https://github.com/zeahmed/machinel…
zeahmed 31ceabb
Making all the Booster classes sealed.
zeahmed 0a56dc8
Resolved merged conflicts.
zeahmed 1b54da4
Resolved conflicts in SamplesDatasetUtils.cs
zeahmed ed2ea83
Reverted changes related to making `ISupportBoosterParameterFactory` …
zeahmed d1d4109
Changed Arguments classes to Options.
zeahmed File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
41 changes: 41 additions & 0 deletions
41
...icrosoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassification.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
using Microsoft.ML.Transforms.Categorical; | ||
|
||
namespace Microsoft.ML.Samples.Dynamic | ||
{ | ||
public class LightGbmBinaryClassification | ||
{ | ||
// This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.LightGBM/">Microsoft.ML.LightGBM</a>. | ||
public static void Example() | ||
{ | ||
// Creating the ML.Net IHostEnvironment object, needed for the pipeline. | ||
var mlContext = new MLContext(); | ||
|
||
// Download and featurize the dataset. | ||
var dataview = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); | ||
|
||
// Leave out 10% of data for testing. | ||
var split = mlContext.BinaryClassification.TrainTestSplit(dataview, testFraction: 0.1); | ||
|
||
// Create the Estimator. | ||
var pipeline = mlContext.BinaryClassification.Trainers.LightGbm("IsOver50K", "Features"); | ||
|
||
// Fit this Pipeline to the Training Data. | ||
var model = pipeline.Fit(split.TrainSet); | ||
|
||
// Evaluate how the model is doing on the test data. | ||
var dataWithPredictions = model.Transform(split.TestSet); | ||
|
||
var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions, "IsOver50K"); | ||
SamplesUtils.ConsoleUtils.PrintMetrics(metrics); | ||
|
||
// Output: | ||
// Accuracy: 0.88 | ||
// AUC: 0.93 | ||
// F1 Score: 0.71 | ||
// Negative Precision: 0.90 | ||
// Negative Recall: 0.94 | ||
// Positive Precision: 0.76 | ||
// Positive Recall: 0.66 | ||
} | ||
} | ||
} |
53 changes: 53 additions & 0 deletions
53
....Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassificationWithOptions.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
using Microsoft.ML.LightGBM; | ||
using Microsoft.ML.Transforms.Categorical; | ||
using static Microsoft.ML.LightGBM.Options; | ||
|
||
namespace Microsoft.ML.Samples.Dynamic | ||
{ | ||
class LightGbmBinaryClassificationWithOptions | ||
{ | ||
// This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.LightGBM/">Microsoft.ML.LightGBM</a>. | ||
public static void Example() | ||
{ | ||
// Creating the ML.Net IHostEnvironment object, needed for the pipeline | ||
var mlContext = new MLContext(); | ||
|
||
// Download and featurize the dataset. | ||
var dataview = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); | ||
|
||
// Leave out 10% of data for testing. | ||
var split = mlContext.BinaryClassification.TrainTestSplit(dataview, testFraction: 0.1); | ||
|
||
// Create the pipeline with LightGbm Estimator using advanced options. | ||
var pipeline = mlContext.BinaryClassification.Trainers.LightGbm( | ||
new Options | ||
{ | ||
LabelColumn = "IsOver50K", | ||
FeatureColumn = "Features", | ||
Booster = new GossBooster.Options | ||
{ | ||
TopRate = 0.3, | ||
OtherRate = 0.2 | ||
} | ||
}); | ||
|
||
// Fit this Pipeline to the Training Data. | ||
var model = pipeline.Fit(split.TrainSet); | ||
|
||
// Evaluate how the model is doing on the test data. | ||
var dataWithPredictions = model.Transform(split.TestSet); | ||
|
||
var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions, "IsOver50K"); | ||
SamplesUtils.ConsoleUtils.PrintMetrics(metrics); | ||
|
||
// Output: | ||
// Accuracy: 0.88 | ||
// AUC: 0.93 | ||
// F1 Score: 0.71 | ||
// Negative Precision: 0.90 | ||
// Negative Recall: 0.94 | ||
// Positive Precision: 0.76 | ||
// Positive Recall: 0.67 | ||
} | ||
} | ||
} |
85 changes: 85 additions & 0 deletions
85
....ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassification.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
using System; | ||
using System.Linq; | ||
using Microsoft.ML.Data; | ||
using Microsoft.ML.SamplesUtils; | ||
|
||
namespace Microsoft.ML.Samples.Dynamic | ||
{ | ||
class LightGbmMulticlassClassification | ||
{ | ||
// This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.LightGBM/">Microsoft.ML.LightGBM</a>. | ||
public static void Example() | ||
{ | ||
// Create a general context for ML.NET operations. It can be used for exception tracking and logging, | ||
// as a catalog of available operations and as the source of randomness. | ||
var mlContext = new MLContext(); | ||
|
||
// Create in-memory examples as C# native class. | ||
var examples = DatasetUtils.GenerateRandomMulticlassClassificationExamples(1000); | ||
|
||
// Convert native C# class to IDataView, a consumble format to ML.NET functions. | ||
var dataView = mlContext.Data.ReadFromEnumerable(examples); | ||
|
||
//////////////////// Data Preview //////////////////// | ||
// Label Features | ||
// AA 0.7262433,0.8173254,0.7680227,0.5581612,0.2060332,0.5588848,0.9060271,0.4421779,0.9775497,0.2737045 | ||
// BB 0.4919063,0.6673147,0.8326591,0.6695119,1.182151,0.230367,1.06237,1.195347,0.8771811,0.5145918 | ||
// CC 1.216908,1.248052,1.391902,0.4326252,1.099942,0.9262842,1.334019,1.08762,0.9468155,0.4811099 | ||
// DD 0.7871246,1.053327,0.8971719,1.588544,1.242697,1.362964,0.6303943,0.9810045,0.9431419,1.557455 | ||
|
||
// Create a pipeline. | ||
// - Convert the string labels into key types. | ||
// - Apply LightGbm multiclass trainer. | ||
var pipeline = mlContext.Transforms.Conversion.MapValueToKey("LabelIndex", "Label") | ||
.Append(mlContext.MulticlassClassification.Trainers.LightGbm(labelColumn: "LabelIndex")) | ||
.Append(mlContext.Transforms.Conversion.MapValueToKey("PredictedLabelIndex", "PredictedLabel")) | ||
.Append(mlContext.Transforms.CopyColumns("Scores", "Score")); | ||
|
||
// Split the static-typed data into training and test sets. Only training set is used in fitting | ||
// the created pipeline. Metrics are computed on the test. | ||
var split = mlContext.MulticlassClassification.TrainTestSplit(dataView, testFraction: 0.5); | ||
|
||
// Train the model. | ||
var model = pipeline.Fit(split.TrainSet); | ||
|
||
// Do prediction on the test set. | ||
var dataWithPredictions = model.Transform(split.TestSet); | ||
|
||
// Evaluate the trained model using the test set. | ||
var metrics = mlContext.MulticlassClassification.Evaluate(dataWithPredictions, label: "LabelIndex"); | ||
|
||
// Check if metrics are reasonable. | ||
Console.WriteLine($"Macro accuracy: {metrics.AccuracyMacro:F4}, Micro accuracy: {metrics.AccuracyMicro:F4}."); | ||
// Console output: | ||
// Macro accuracy: 0.8655, Micro accuracy: 0.8651. | ||
|
||
// IDataView with predictions, to an IEnumerable<DatasetUtils.MulticlassClassificationExample>. | ||
var nativePredictions = mlContext.CreateEnumerable<DatasetUtils.MulticlassClassificationExample>(dataWithPredictions, false).ToList(); | ||
|
||
// Get schema object out of the prediction. It contains metadata such as the mapping from predicted label index | ||
// (e.g., 1) to its actual label (e.g., "AA"). | ||
// The metadata can be used to get all the unique labels used during training. | ||
var labelBuffer = new VBuffer<ReadOnlyMemory<char>>(); | ||
dataWithPredictions.Schema["PredictedLabelIndex"].GetKeyValues(ref labelBuffer); | ||
// nativeLabels is { "AA" , "BB", "CC", "DD" } | ||
var nativeLabels = labelBuffer.DenseValues().ToArray(); // nativeLabels[nativePrediction.PredictedLabelIndex - 1] is the original label indexed by nativePrediction.PredictedLabelIndex. | ||
|
||
|
||
// Show prediction result for the 3rd example. | ||
var nativePrediction = nativePredictions[2]; | ||
// Console output: | ||
// Our predicted label to this example is "AA" with probability 0.9257. | ||
Console.WriteLine($"Our predicted label to this example is {nativeLabels[(int)nativePrediction.PredictedLabelIndex - 1]} " + | ||
$"with probability {nativePrediction.Scores[(int)nativePrediction.PredictedLabelIndex - 1]:F4}."); | ||
|
||
// Scores and nativeLabels are two parallel attributes; that is, Scores[i] is the probability of being nativeLabels[i]. | ||
// Console output: | ||
// The probability of being class "AA" is 0.9257. | ||
// The probability of being class "BB" is 0.0739. | ||
// The probability of being class "CC" is 0.0002. | ||
// The probability of being class "DD" is 0.0001. | ||
for (int i = 0; i < nativeLabels.Length; ++i) | ||
Console.WriteLine($"The probability of being class {nativeLabels[i]} is {nativePrediction.Scores[i]:F4}."); | ||
} | ||
} | ||
} |
96 changes: 96 additions & 0 deletions
96
.../Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassificationWithOptions.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
using System; | ||
using System.Linq; | ||
using Microsoft.ML.Data; | ||
using Microsoft.ML.LightGBM; | ||
using Microsoft.ML.SamplesUtils; | ||
using static Microsoft.ML.LightGBM.Options; | ||
|
||
namespace Microsoft.ML.Samples.Dynamic | ||
{ | ||
class LightGbmMulticlassClassificationWithOptions | ||
{ | ||
// This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.LightGBM/">Microsoft.ML.LightGBM</a>. | ||
public static void Example() | ||
{ | ||
// Create a general context for ML.NET operations. It can be used for exception tracking and logging, | ||
// as a catalog of available operations and as the source of randomness. | ||
var mlContext = new MLContext(seed: 0); | ||
|
||
// Create in-memory examples as C# native class. | ||
var examples = DatasetUtils.GenerateRandomMulticlassClassificationExamples(1000); | ||
|
||
// Convert native C# class to IDataView, a consumble format to ML.NET functions. | ||
var dataView = mlContext.Data.ReadFromEnumerable(examples); | ||
|
||
//////////////////// Data Preview //////////////////// | ||
// Label Features | ||
// AA 0.7262433,0.8173254,0.7680227,0.5581612,0.2060332,0.5588848,0.9060271,0.4421779,0.9775497,0.2737045 | ||
// BB 0.4919063,0.6673147,0.8326591,0.6695119,1.182151,0.230367,1.06237,1.195347,0.8771811,0.5145918 | ||
// CC 1.216908,1.248052,1.391902,0.4326252,1.099942,0.9262842,1.334019,1.08762,0.9468155,0.4811099 | ||
// DD 0.7871246,1.053327,0.8971719,1.588544,1.242697,1.362964,0.6303943,0.9810045,0.9431419,1.557455 | ||
|
||
// Create a pipeline. | ||
// - Convert the string labels into key types. | ||
// - Apply LightGbm multiclass trainer with advanced options. | ||
var pipeline = mlContext.Transforms.Conversion.MapValueToKey("LabelIndex", "Label") | ||
.Append(mlContext.MulticlassClassification.Trainers.LightGbm(new Options | ||
{ | ||
LabelColumn = "LabelIndex", | ||
FeatureColumn = "Features", | ||
Booster = new DartBooster.Options | ||
{ | ||
DropRate = 0.15, | ||
XgboostDartMode = false | ||
} | ||
})) | ||
.Append(mlContext.Transforms.Conversion.MapValueToKey("PredictedLabelIndex", "PredictedLabel")) | ||
.Append(mlContext.Transforms.CopyColumns("Scores", "Score")); | ||
|
||
// Split the static-typed data into training and test sets. Only training set is used in fitting | ||
// the created pipeline. Metrics are computed on the test. | ||
var split = mlContext.MulticlassClassification.TrainTestSplit(dataView, testFraction: 0.5); | ||
|
||
// Train the model. | ||
var model = pipeline.Fit(split.TrainSet); | ||
|
||
// Do prediction on the test set. | ||
var dataWithPredictions = model.Transform(split.TestSet); | ||
|
||
// Evaluate the trained model using the test set. | ||
var metrics = mlContext.MulticlassClassification.Evaluate(dataWithPredictions, label: "LabelIndex"); | ||
|
||
// Check if metrics are reasonable. | ||
Console.WriteLine($"Macro accuracy: {metrics.AccuracyMacro:F4}, Micro accuracy: {metrics.AccuracyMicro:F4}."); | ||
// Console output: | ||
// Macro accuracy: 0.8619, Micro accuracy: 0.8611. | ||
|
||
// IDataView with predictions, to an IEnumerable<DatasetUtils.MulticlassClassificationExample>. | ||
var nativePredictions = mlContext.CreateEnumerable<DatasetUtils.MulticlassClassificationExample>(dataWithPredictions, false).ToList(); | ||
|
||
// Get schema object out of the prediction. It contains metadata such as the mapping from predicted label index | ||
// (e.g., 1) to its actual label (e.g., "AA"). | ||
// The metadata can be used to get all the unique labels used during training. | ||
var labelBuffer = new VBuffer<ReadOnlyMemory<char>>(); | ||
dataWithPredictions.Schema["PredictedLabelIndex"].GetKeyValues(ref labelBuffer); | ||
// nativeLabels is { "AA" , "BB", "CC", "DD" } | ||
var nativeLabels = labelBuffer.DenseValues().ToArray(); // nativeLabels[nativePrediction.PredictedLabelIndex - 1] is the original label indexed by nativePrediction.PredictedLabelIndex. | ||
|
||
|
||
// Show prediction result for the 3rd example. | ||
var nativePrediction = nativePredictions[2]; | ||
// Console output: | ||
// Our predicted label to this example is AA with probability 0.8986. | ||
Console.WriteLine($"Our predicted label to this example is {nativeLabels[(int)nativePrediction.PredictedLabelIndex - 1]} " + | ||
$"with probability {nativePrediction.Scores[(int)nativePrediction.PredictedLabelIndex - 1]:F4}."); | ||
|
||
// Scores and nativeLabels are two parallel attributes; that is, Scores[i] is the probability of being nativeLabels[i]. | ||
// Console output: | ||
// The probability of being class AA is 0.8986. | ||
// The probability of being class BB is 0.0961. | ||
// The probability of being class CC is 0.0050. | ||
// The probability of being class DD is 0.0003. | ||
for (int i = 0; i < nativeLabels.Length; ++i) | ||
Console.WriteLine($"The probability of being class {nativeLabels[i]} is {nativePrediction.Scores[i]:F4}."); | ||
} | ||
} | ||
} |
65 changes: 65 additions & 0 deletions
65
docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegression.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
using System; | ||
using System.Linq; | ||
using Microsoft.ML.Data; | ||
|
||
namespace Microsoft.ML.Samples.Dynamic | ||
{ | ||
class LightGbmRegression | ||
{ | ||
// This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.LightGBM/">Microsoft.ML.LightGBM</a>. | ||
public static void Example() | ||
{ | ||
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, | ||
// as well as the source of randomness. | ||
var mlContext = new MLContext(); | ||
|
||
// Download and load the housing dataset into an IDataView. | ||
var dataView = SamplesUtils.DatasetUtils.LoadHousingRegressionDataset(mlContext); | ||
|
||
//////////////////// Data Preview //////////////////// | ||
/// Only 6 columns are displayed here. | ||
// MedianHomeValue CrimesPerCapita PercentResidental PercentNonRetail CharlesRiver NitricOxides RoomsPerDwelling PercentPre40s ... | ||
// 24.00 0.00632 18.00 2.310 0 0.5380 6.5750 65.20 ... | ||
// 21.60 0.02731 00.00 7.070 0 0.4690 6.4210 78.90 ... | ||
// 34.70 0.02729 00.00 7.070 0 0.4690 7.1850 61.10 ... | ||
|
||
var split = mlContext.Regression.TrainTestSplit(dataView, testFraction: 0.1); | ||
|
||
// Create the estimator, here we only need LightGbm trainer | ||
// as data is already processed in a form consumable by the trainer. | ||
var labelName = "MedianHomeValue"; | ||
var featureNames = dataView.Schema | ||
.Select(column => column.Name) // Get the column names | ||
.Where(name => name != labelName) // Drop the Label | ||
.ToArray(); | ||
var pipeline = mlContext.Transforms.Concatenate("Features", featureNames) | ||
.Append(mlContext.Regression.Trainers.LightGbm( | ||
labelColumn: labelName, | ||
numLeaves: 4, | ||
minDataPerLeaf: 6, | ||
learningRate: 0.001)); | ||
|
||
// Fit this pipeline to the training data. | ||
var model = pipeline.Fit(split.TrainSet); | ||
|
||
// Get the feature importance based on the information gain used during training. | ||
VBuffer<float> weights = default; | ||
model.LastTransformer.Model.GetFeatureWeights(ref weights); | ||
var weightsValues = weights.DenseValues().ToArray(); | ||
Console.WriteLine($"weight 0 - {weightsValues[0]}"); // CrimesPerCapita (weight 0) = 0.1898361 | ||
Console.WriteLine($"weight 5 - {weightsValues[5]}"); // RoomsPerDwelling (weight 5) = 1 | ||
|
||
// Evaluate how the model is doing on the test data. | ||
var dataWithPredictions = model.Transform(split.TestSet); | ||
var metrics = mlContext.Regression.Evaluate(dataWithPredictions, label: labelName); | ||
SamplesUtils.ConsoleUtils.PrintMetrics(metrics); | ||
|
||
// Output | ||
// L1: 4.97 | ||
// L2: 51.37 | ||
// LossFunction: 51.37 | ||
// RMS: 7.17 | ||
// RSquared: 0.08 | ||
} | ||
} | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
shall we comment what this sample require LightGbm nuget package? #Resolved