Skip to content

Adding sample for LightGbm ranking #2729

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Feb 26, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
using Microsoft.ML.Transforms.Categorical;

namespace Microsoft.ML.Samples.Dynamic
namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification
{
public class LightGbmBinaryClassification
public class LightGbm
{
// This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.LightGBM/">Microsoft.ML.LightGBM</a>.
public static void Example()
Expand All @@ -17,25 +17,25 @@ public static void Example()
var split = mlContext.BinaryClassification.TrainTestSplit(dataview, testFraction: 0.1);

// Create the Estimator.
var pipeline = mlContext.BinaryClassification.Trainers.LightGbm("IsOver50K", "Features");
var pipeline = mlContext.BinaryClassification.Trainers.LightGbm();

// Fit this Pipeline to the Training Data.
var model = pipeline.Fit(split.TrainSet);

// Evaluate how the model is doing on the test data.
var dataWithPredictions = model.Transform(split.TestSet);

var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions, "IsOver50K");
var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions);
SamplesUtils.ConsoleUtils.PrintMetrics(metrics);

// Output:
// Accuracy: 0.88
// AUC: 0.93
// F1 Score: 0.71
// Negative Precision: 0.90
// Negative Recall: 0.94
// Positive Precision: 0.76
// Positive Recall: 0.66
// Expected output:
// Accuracy: 0.88
// AUC: 0.93
// F1 Score: 0.71
// Negative Precision: 0.90
// Negative Recall: 0.94
// Positive Precision: 0.76
// Positive Recall: 0.66
}
}
}
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
using Microsoft.ML.LightGBM;
using Microsoft.ML.Transforms.Categorical;
using static Microsoft.ML.LightGBM.Options;

namespace Microsoft.ML.Samples.Dynamic
namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification
{
class LightGbmBinaryClassificationWithOptions
class LightGbmWithOptions
{
// This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.LightGBM/">Microsoft.ML.LightGBM</a>.
public static void Example()
Expand All @@ -22,8 +21,6 @@ public static void Example()
var pipeline = mlContext.BinaryClassification.Trainers.LightGbm(
new Options
{
LabelColumn = "IsOver50K",
FeatureColumn = "Features",
Booster = new GossBooster.Options
{
TopRate = 0.3,
Expand All @@ -37,17 +34,17 @@ public static void Example()
// Evaluate how the model is doing on the test data.
var dataWithPredictions = model.Transform(split.TestSet);

var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions, "IsOver50K");
var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions);
SamplesUtils.ConsoleUtils.PrintMetrics(metrics);

// Output:
// Accuracy: 0.88
// AUC: 0.93
// F1 Score: 0.71
// Negative Precision: 0.90
// Negative Recall: 0.94
// Positive Precision: 0.76
// Positive Recall: 0.67
// Expected output:
// Accuracy: 0.88
// AUC: 0.93
// F1 Score: 0.71
// Negative Precision: 0.90
// Negative Recall: 0.94
// Positive Precision: 0.76
// Positive Recall: 0.67
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
using Microsoft.ML.Data;
using Microsoft.ML.Trainers;

namespace Microsoft.ML.Samples.Dynamic
namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification
{
public static class SDCALogisticRegression
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
using System.Linq;
using Microsoft.ML.Data;

namespace Microsoft.ML.Samples.Dynamic
namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification
{
public static class SDCASupportVectorMachine
{
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
namespace Microsoft.ML.Samples.Dynamic
namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification
{
public static class SymbolicStochasticGradientDescent
{
Expand All @@ -24,15 +24,17 @@ public static void Example()

// Evaluate how the model is doing on the test data.
var dataWithPredictions = model.Transform(split.TestSet);
var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions, "IsOver50K");
var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions);
SamplesUtils.ConsoleUtils.PrintMetrics(metrics);
// Accuracy: 0.85
// AUC: 0.90
// F1 Score: 0.64
// Negative Precision: 0.88
// Negative Recall: 0.93
// Positive Precision: 0.72
// Positive Recall: 0.58

// Expected output:
// Accuracy: 0.85
// AUC: 0.90
// F1 Score: 0.64
// Negative Precision: 0.88
// Negative Recall: 0.93
// Positive Precision: 0.72
// Positive Recall: 0.58
}
}
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
namespace Microsoft.ML.Samples.Dynamic
namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification
{
public static class SymbolicStochasticGradientDescentWithOptions
{
Expand All @@ -22,7 +22,6 @@ public static void Example()
var pipeline = mlContext.BinaryClassification.Trainers.SymbolicStochasticGradientDescent(
new ML.Trainers.HalLearners.SymSgdClassificationTrainer.Options()
{
LabelColumn = "IsOver50K",
LearningRate = 0.2f,
NumberOfIterations = 10,
NumberOfThreads = 1,
Expand All @@ -33,15 +32,17 @@ public static void Example()

// Evaluate how the model is doing on the test data.
var dataWithPredictions = model.Transform(split.TestSet);
var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions, "IsOver50K");
var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions);
SamplesUtils.ConsoleUtils.PrintMetrics(metrics);
// Accuracy: 0.84
// AUC: 0.88
// F1 Score: 0.60
// Negative Precision: 0.87
// Negative Recall: 0.93
// Positive Precision: 0.69
// Positive Recall: 0.53

// Expected output:
// Accuracy: 0.84
// AUC: 0.88
// F1 Score: 0.60
// Negative Precision: 0.87
// Negative Recall: 0.93
// Positive Precision: 0.69
// Positive Recall: 0.53
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
using Microsoft.ML.Data;
using Microsoft.ML.SamplesUtils;

namespace Microsoft.ML.Samples.Dynamic
namespace Microsoft.ML.Samples.Dynamic.Trainers.MulticlassClassification
{
class LightGbmMulticlassClassification
class LightGbm
{
// This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.LightGBM/">Microsoft.ML.LightGBM</a>.
public static void Example()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
using Microsoft.ML.SamplesUtils;
using static Microsoft.ML.LightGBM.Options;

namespace Microsoft.ML.Samples.Dynamic
namespace Microsoft.ML.Samples.Dynamic.Trainers.MulticlassClassification
{
class LightGbmMulticlassClassificationWithOptions
class LightGbmWithOptions
{
// This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.LightGBM/">Microsoft.ML.LightGBM</a>.
public static void Example()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
using Microsoft.ML;

namespace Microsoft.ML.Samples.Dynamic.Trainers.Ranking
{
public class LightGbm
{
// This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.LightGBM/">Microsoft.ML.LightGBM</a>.
public static void Example()
{
// Creating the ML.Net IHostEnvironment object, needed for the pipeline.
var mlContext = new MLContext();

// Download and featurize the dataset.
var dataview = SamplesUtils.DatasetUtils.LoadFeaturizedMslrWeb10kDataset(mlContext);

// Leave out 10% of the dataset for testing. Since this is a ranking problem, we must ensure that the split
// respects the GroupId column, i.e. rows with the same GroupId are either all in the train split or all in
// the test split. The samplingKeyColumn parameter in Ranking.TrainTestSplit is used for this purpose.
var split = mlContext.Ranking.TrainTestSplit(dataview, testFraction: 0.1, samplingKeyColumn: "GroupId");

// Create the Estimator pipeline. For simplicity, we will train a small tree with 4 leaves and 2 boosting iterations.
var pipeline = mlContext.Ranking.Trainers.LightGbm(
numLeaves: 4,
minDataPerLeaf: 10,
learningRate: 0.1,
numBoostRound: 2);

// Fit this Pipeline to the Training Data.
var model = pipeline.Fit(split.TrainSet);

// Evaluate how the model is doing on the test data.
var dataWithPredictions = model.Transform(split.TestSet);

var metrics = mlContext.Ranking.Evaluate(dataWithPredictions);
SamplesUtils.ConsoleUtils.PrintMetrics(metrics);

// Expected output:
// DCG: @1:1.71, @2:3.88, @3:7.93
// NDCG: @1:7.98, @2:12.14, @3:16.62
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
using Microsoft.ML.LightGBM;
using static Microsoft.ML.LightGBM.Options;

namespace Microsoft.ML.Samples.Dynamic.Trainers.Ranking
{
public class LightGbmWithOptions
{
// This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.LightGBM/">Microsoft.ML.LightGBM</a>.
public static void Example()
{
// Creating the ML.Net IHostEnvironment object, needed for the pipeline.
var mlContext = new MLContext();

// Download and featurize the train and validation datasets.
var dataview = SamplesUtils.DatasetUtils.LoadFeaturizedMslrWeb10kDataset(mlContext);

// Leave out 10% of the dataset for testing. Since this is a ranking problem, we must ensure that the split
// respects the GroupId column, i.e. rows with the same GroupId are either all in the train split or all in
// the test split. The samplingKeyColumn parameter in Ranking.TrainTestSplit is used for this purpose.
var split = mlContext.Ranking.TrainTestSplit(dataview, testFraction: 0.1, samplingKeyColumn: "GroupId");

// Create the Estimator pipeline. For simplicity, we will train a small tree with 4 leaves and 2 boosting iterations.
var pipeline = mlContext.Ranking.Trainers.LightGbm(
new Options
{
NumLeaves = 4,
MinDataPerLeaf = 10,
LearningRate = 0.1,
NumBoostRound = 2,
Booster = new TreeBooster.Options
{
FeatureFraction = 0.9
}
});

// Fit this pipeline to the training Data.
var model = pipeline.Fit(split.TrainSet);

// Evaluate how the model is doing on the test data.
var dataWithPredictions = model.Transform(split.TestSet);

var metrics = mlContext.Ranking.Evaluate(dataWithPredictions);
SamplesUtils.ConsoleUtils.PrintMetrics(metrics);

// Expected output:
// DCG: @1:1.71, @2:3.88, @3:7.93
// NDCG: @1:7.98, @2:12.14, @3:16.62
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
using Microsoft.ML.Data;
using static Microsoft.ML.SamplesUtils.DatasetUtils;

namespace Microsoft.ML.Samples.Dynamic
namespace Microsoft.ML.Samples.Dynamic.Trainers.Recommendation
{
public static class MatrixFactorization
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
using Microsoft.ML.Trainers;
using static Microsoft.ML.SamplesUtils.DatasetUtils;

namespace Microsoft.ML.Samples.Dynamic
namespace Microsoft.ML.Samples.Dynamic.Trainers.Recommendation
{
public static class MatrixFactorizationWithOptions
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
using System.Linq;
using Microsoft.ML.Data;

namespace Microsoft.ML.Samples.Dynamic
namespace Microsoft.ML.Samples.Dynamic.Trainers.Regression
{
class LightGbmRegression
class LightGbm
{
// This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.LightGBM/">Microsoft.ML.LightGBM</a>.
public static void Example()
Expand Down Expand Up @@ -54,12 +54,12 @@ public static void Example()
var metrics = mlContext.Regression.Evaluate(dataWithPredictions, label: labelName);
SamplesUtils.ConsoleUtils.PrintMetrics(metrics);

// Output
// L1: 4.97
// L2: 51.37
// LossFunction: 51.37
// RMS: 7.17
// RSquared: 0.08
// Expected output
// L1: 4.97
// L2: 51.37
// LossFunction: 51.37
// RMS: 7.17
// RSquared: 0.08
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
using Microsoft.ML.LightGBM;
using static Microsoft.ML.LightGBM.Options;

namespace Microsoft.ML.Samples.Dynamic
namespace Microsoft.ML.Samples.Dynamic.Trainers.Regression
{
class LightGbmRegressionWithOptions
class LightGbmWithOptions
{
// This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.LightGBM/">Microsoft.ML.LightGBM</a>.
public static void Example()
Expand Down Expand Up @@ -64,12 +64,12 @@ public static void Example()
var metrics = mlContext.Regression.Evaluate(dataWithPredictions, label: labelName);
SamplesUtils.ConsoleUtils.PrintMetrics(metrics);

// Output
// L1: 4.97
// L2: 51.37
// LossFunction: 51.37
// RMS: 7.17
// RSquared: 0.08
// Expected output
// L1: 4.97
// L2: 51.37
// LossFunction: 51.37
// RMS: 7.17
// RSquared: 0.08
}
}
}
Loading