dotnet · zeahmed · Feb 15, 2019 · Feb 8, 2019 · Feb 11, 2019 · Feb 12, 2019
diff --git a/...icrosoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassification.cs b/...icrosoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassification.cs
@@ -0,0 +1,41 @@
+using Microsoft.ML.Transforms.Categorical;
+
+namespace Microsoft.ML.Samples.Dynamic
+{
+    public class LightGbmBinaryClassification
+    {
+        // This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.LightGBM/">Microsoft.ML.LightGBM</a>.
+        public static void Example()
+        {
+            // Creating the ML.Net IHostEnvironment object, needed for the pipeline.
+            var mlContext = new MLContext();
+
+            // Download and featurize the dataset.
+            var dataview = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);
+
+            // Leave out 10% of data for testing.
+            var split = mlContext.BinaryClassification.TrainTestSplit(dataview, testFraction: 0.1);
+
+            // Create the Estimator.
+            var pipeline = mlContext.BinaryClassification.Trainers.LightGbm("IsOver50K", "Features");
+
+            // Fit this Pipeline to the Training Data.
+            var model = pipeline.Fit(split.TrainSet);
+
+            // Evaluate how the model is doing on the test data.
+            var dataWithPredictions = model.Transform(split.TestSet);
+
+            var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions, "IsOver50K");
+            SamplesUtils.ConsoleUtils.PrintMetrics(metrics);
+
+            // Output:
+            // Accuracy: 0.88
+            // AUC: 0.93
+            // F1 Score: 0.71
+            // Negative Precision: 0.90
+            // Negative Recall: 0.94
+            // Positive Precision: 0.76
+            // Positive Recall: 0.66
+        }
+    }
+}
diff --git a/....Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassificationWithOptions.cs b/....Samples/Dynamic/Trainers/BinaryClassification/LightGBMBinaryClassificationWithOptions.cs
@@ -0,0 +1,53 @@
+using Microsoft.ML.LightGBM;
+using Microsoft.ML.Transforms.Categorical;
+using static Microsoft.ML.LightGBM.Options;
+
+namespace Microsoft.ML.Samples.Dynamic
+{
+    class LightGbmBinaryClassificationWithOptions
+    {
+        // This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.LightGBM/">Microsoft.ML.LightGBM</a>.
+        public static void Example()
+        {
+            // Creating the ML.Net IHostEnvironment object, needed for the pipeline
+            var mlContext = new MLContext();
+
+            // Download and featurize the dataset.
+            var dataview = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);
+
+            // Leave out 10% of data for testing.
+            var split = mlContext.BinaryClassification.TrainTestSplit(dataview, testFraction: 0.1);
+
+            // Create the pipeline with LightGbm Estimator using advanced options.
+            var pipeline = mlContext.BinaryClassification.Trainers.LightGbm(
+                                new Options
+                                {
+                                    LabelColumn = "IsOver50K",
+                                    FeatureColumn = "Features",
+                                    Booster = new GossBooster.Options
+                                    {
+                                        TopRate = 0.3,
+                                        OtherRate = 0.2
+                                    }
+                                });
+
+            // Fit this Pipeline to the Training Data.
+            var model = pipeline.Fit(split.TrainSet);
+
+            // Evaluate how the model is doing on the test data.
+            var dataWithPredictions = model.Transform(split.TestSet);
+
+            var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions, "IsOver50K");
+            SamplesUtils.ConsoleUtils.PrintMetrics(metrics);
+
+            // Output:
+            // Accuracy: 0.88
+            // AUC: 0.93
+            // F1 Score: 0.71
+            // Negative Precision: 0.90
+            // Negative Recall: 0.94
+            // Positive Precision: 0.76
+            // Positive Recall: 0.67
+        }
+    }
+}
diff --git a/....ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassification.cs b/....ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassification.cs
@@ -0,0 +1,85 @@
+using System;
+using System.Linq;
+using Microsoft.ML.Data;
+using Microsoft.ML.SamplesUtils;
+
+namespace Microsoft.ML.Samples.Dynamic
+{
+    class LightGbmMulticlassClassification
+    {
+        // This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.LightGBM/">Microsoft.ML.LightGBM</a>.
+        public static void Example()
+        {
+            // Create a general context for ML.NET operations. It can be used for exception tracking and logging,
+            // as a catalog of available operations and as the source of randomness.
+            var mlContext = new MLContext();
+
+            // Create in-memory examples as C# native class.
+            var examples = DatasetUtils.GenerateRandomMulticlassClassificationExamples(1000);
+
+            // Convert native C# class to IDataView, a consumble format to ML.NET functions.
+            var dataView = mlContext.Data.ReadFromEnumerable(examples);
+
+            //////////////////// Data Preview ////////////////////
+            // Label    Features
+            // AA       0.7262433,0.8173254,0.7680227,0.5581612,0.2060332,0.5588848,0.9060271,0.4421779,0.9775497,0.2737045
+            // BB       0.4919063,0.6673147,0.8326591,0.6695119,1.182151,0.230367,1.06237,1.195347,0.8771811,0.5145918
+            // CC       1.216908,1.248052,1.391902,0.4326252,1.099942,0.9262842,1.334019,1.08762,0.9468155,0.4811099
+            // DD       0.7871246,1.053327,0.8971719,1.588544,1.242697,1.362964,0.6303943,0.9810045,0.9431419,1.557455
+
+            // Create a pipeline. 
+            //  - Convert the string labels into key types.
+            //  - Apply LightGbm multiclass trainer.
+            var pipeline = mlContext.Transforms.Conversion.MapValueToKey("LabelIndex", "Label")
+                        .Append(mlContext.MulticlassClassification.Trainers.LightGbm(labelColumn: "LabelIndex"))
+                        .Append(mlContext.Transforms.Conversion.MapValueToKey("PredictedLabelIndex", "PredictedLabel"))
+                        .Append(mlContext.Transforms.CopyColumns("Scores", "Score"));
+
+            // Split the static-typed data into training and test sets. Only training set is used in fitting
+            // the created pipeline. Metrics are computed on the test.
+            var split = mlContext.MulticlassClassification.TrainTestSplit(dataView, testFraction: 0.5);
+
+            // Train the model.
+            var model = pipeline.Fit(split.TrainSet);
+
+            // Do prediction on the test set.
+            var dataWithPredictions = model.Transform(split.TestSet);
+
+            // Evaluate the trained model using the test set.
+            var metrics = mlContext.MulticlassClassification.Evaluate(dataWithPredictions, label: "LabelIndex");
+
+            // Check if metrics are reasonable.
+            Console.WriteLine($"Macro accuracy: {metrics.AccuracyMacro:F4}, Micro accuracy: {metrics.AccuracyMicro:F4}.");
+            // Console output:
+            //   Macro accuracy: 0.8655, Micro accuracy: 0.8651.
+
+            // IDataView with predictions, to an IEnumerable<DatasetUtils.MulticlassClassificationExample>.
+            var nativePredictions = mlContext.CreateEnumerable<DatasetUtils.MulticlassClassificationExample>(dataWithPredictions, false).ToList();
+
+            // Get schema object out of the prediction. It contains metadata such as the mapping from predicted label index
+            // (e.g., 1) to its actual label (e.g., "AA").
+            // The metadata can be used to get all the unique labels used during training.
+            var labelBuffer = new VBuffer<ReadOnlyMemory<char>>();
+            dataWithPredictions.Schema["PredictedLabelIndex"].GetKeyValues(ref labelBuffer);
+            // nativeLabels is { "AA" , "BB", "CC", "DD" }
+            var nativeLabels = labelBuffer.DenseValues().ToArray(); // nativeLabels[nativePrediction.PredictedLabelIndex - 1] is the original label indexed by nativePrediction.PredictedLabelIndex.
+
+
+            // Show prediction result for the 3rd example.
+            var nativePrediction = nativePredictions[2];
+            // Console output:
+            //   Our predicted label to this example is "AA" with probability 0.9257.
+            Console.WriteLine($"Our predicted label to this example is {nativeLabels[(int)nativePrediction.PredictedLabelIndex - 1]} " +
+                $"with probability {nativePrediction.Scores[(int)nativePrediction.PredictedLabelIndex - 1]:F4}.");
+
+            // Scores and nativeLabels are two parallel attributes; that is, Scores[i] is the probability of being nativeLabels[i].
+            // Console output:
+            //  The probability of being class "AA" is 0.9257.
+            //  The probability of being class "BB" is 0.0739.
+            //  The probability of being class "CC" is 0.0002.
+            //  The probability of being class "DD" is 0.0001.
+            for (int i = 0; i < nativeLabels.Length; ++i)
+                Console.WriteLine($"The probability of being class {nativeLabels[i]} is {nativePrediction.Scores[i]:F4}.");
+        }
+    }
+}
diff --git a/.../Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassificationWithOptions.cs b/.../Dynamic/Trainers/MulticlassClassification/LightGBMMulticlassClassificationWithOptions.cs
@@ -0,0 +1,96 @@
+using System;
+using System.Linq;
+using Microsoft.ML.Data;
+using Microsoft.ML.LightGBM;
+using Microsoft.ML.SamplesUtils;
+using static Microsoft.ML.LightGBM.Options;
+
+namespace Microsoft.ML.Samples.Dynamic
+{
+    class LightGbmMulticlassClassificationWithOptions
+    {
+        // This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.LightGBM/">Microsoft.ML.LightGBM</a>.
+        public static void Example()
+        {
+            // Create a general context for ML.NET operations. It can be used for exception tracking and logging,
+            // as a catalog of available operations and as the source of randomness.
+            var mlContext = new MLContext(seed: 0);
+
+            // Create in-memory examples as C# native class.
+            var examples = DatasetUtils.GenerateRandomMulticlassClassificationExamples(1000);
+
+            // Convert native C# class to IDataView, a consumble format to ML.NET functions.
+            var dataView = mlContext.Data.ReadFromEnumerable(examples);
+
+            //////////////////// Data Preview ////////////////////
+            // Label    Features
+            // AA       0.7262433,0.8173254,0.7680227,0.5581612,0.2060332,0.5588848,0.9060271,0.4421779,0.9775497,0.2737045
+            // BB       0.4919063,0.6673147,0.8326591,0.6695119,1.182151,0.230367,1.06237,1.195347,0.8771811,0.5145918
+            // CC       1.216908,1.248052,1.391902,0.4326252,1.099942,0.9262842,1.334019,1.08762,0.9468155,0.4811099
+            // DD       0.7871246,1.053327,0.8971719,1.588544,1.242697,1.362964,0.6303943,0.9810045,0.9431419,1.557455
+
+            // Create a pipeline. 
+            //  - Convert the string labels into key types.
+            //  - Apply LightGbm multiclass trainer with advanced options.
+            var pipeline = mlContext.Transforms.Conversion.MapValueToKey("LabelIndex", "Label")
+                        .Append(mlContext.MulticlassClassification.Trainers.LightGbm(new Options
+                        {
+                            LabelColumn = "LabelIndex",
+                            FeatureColumn = "Features",
+                            Booster = new DartBooster.Options
+                            {
+                                DropRate = 0.15,
+                                XgboostDartMode = false
+                            }
+                        }))
+                        .Append(mlContext.Transforms.Conversion.MapValueToKey("PredictedLabelIndex", "PredictedLabel"))
+                        .Append(mlContext.Transforms.CopyColumns("Scores", "Score"));
+
+            // Split the static-typed data into training and test sets. Only training set is used in fitting
+            // the created pipeline. Metrics are computed on the test.
+            var split = mlContext.MulticlassClassification.TrainTestSplit(dataView, testFraction: 0.5);
+
+            // Train the model.
+            var model = pipeline.Fit(split.TrainSet);
+
+            // Do prediction on the test set.
+            var dataWithPredictions = model.Transform(split.TestSet);
+
+            // Evaluate the trained model using the test set.
+            var metrics = mlContext.MulticlassClassification.Evaluate(dataWithPredictions, label: "LabelIndex");
+
+            // Check if metrics are reasonable.
+            Console.WriteLine($"Macro accuracy: {metrics.AccuracyMacro:F4}, Micro accuracy: {metrics.AccuracyMicro:F4}.");
+            // Console output:
+            //   Macro accuracy: 0.8619, Micro accuracy: 0.8611.
+
+            // IDataView with predictions, to an IEnumerable<DatasetUtils.MulticlassClassificationExample>.
+            var nativePredictions = mlContext.CreateEnumerable<DatasetUtils.MulticlassClassificationExample>(dataWithPredictions, false).ToList();
+
+            // Get schema object out of the prediction. It contains metadata such as the mapping from predicted label index
+            // (e.g., 1) to its actual label (e.g., "AA").
+            // The metadata can be used to get all the unique labels used during training.
+            var labelBuffer = new VBuffer<ReadOnlyMemory<char>>();
+            dataWithPredictions.Schema["PredictedLabelIndex"].GetKeyValues(ref labelBuffer);
+            // nativeLabels is { "AA" , "BB", "CC", "DD" }
+            var nativeLabels = labelBuffer.DenseValues().ToArray(); // nativeLabels[nativePrediction.PredictedLabelIndex - 1] is the original label indexed by nativePrediction.PredictedLabelIndex.
+
+
+            // Show prediction result for the 3rd example.
+            var nativePrediction = nativePredictions[2];
+            // Console output:
+            //   Our predicted label to this example is AA with probability 0.8986.
+            Console.WriteLine($"Our predicted label to this example is {nativeLabels[(int)nativePrediction.PredictedLabelIndex - 1]} " +
+                $"with probability {nativePrediction.Scores[(int)nativePrediction.PredictedLabelIndex - 1]:F4}.");
+
+            // Scores and nativeLabels are two parallel attributes; that is, Scores[i] is the probability of being nativeLabels[i].
+            // Console output:
+            //  The probability of being class AA is 0.8986.
+            //  The probability of being class BB is 0.0961.
+            //  The probability of being class CC is 0.0050.
+            //  The probability of being class DD is 0.0003.
+            for (int i = 0; i < nativeLabels.Length; ++i)
+                Console.WriteLine($"The probability of being class {nativeLabels[i]} is {nativePrediction.Scores[i]:F4}.");
+        }
+    }
+}
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegression.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGBMRegression.cs
@@ -0,0 +1,65 @@
+using System;
+using System.Linq;
+using Microsoft.ML.Data;
+
+namespace Microsoft.ML.Samples.Dynamic
+{
+    class LightGbmRegression
+    {
+        // This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.LightGBM/">Microsoft.ML.LightGBM</a>.
+        public static void Example()
+        {
+            // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, 
+            // as well as the source of randomness.
+            var mlContext = new MLContext();
+
+            // Download and load the housing dataset into an IDataView.
+            var dataView = SamplesUtils.DatasetUtils.LoadHousingRegressionDataset(mlContext);
+
+            //////////////////// Data Preview ////////////////////
+            /// Only 6 columns are displayed here.
+            // MedianHomeValue    CrimesPerCapita    PercentResidental    PercentNonRetail    CharlesRiver    NitricOxides    RoomsPerDwelling    PercentPre40s     ...
+            // 24.00              0.00632            18.00                2.310               0               0.5380          6.5750              65.20             ...
+            // 21.60              0.02731            00.00                7.070               0               0.4690          6.4210              78.90             ...
+            // 34.70              0.02729            00.00                7.070               0               0.4690          7.1850              61.10             ...
+
+            var split = mlContext.Regression.TrainTestSplit(dataView, testFraction: 0.1);
+
+            // Create the estimator, here we only need LightGbm trainer
+            // as data is already processed in a form consumable by the trainer.
+            var labelName = "MedianHomeValue";
+            var featureNames = dataView.Schema
+                .Select(column => column.Name) // Get the column names
+                .Where(name => name != labelName) // Drop the Label
+                .ToArray();
+            var pipeline = mlContext.Transforms.Concatenate("Features", featureNames)
+                           .Append(mlContext.Regression.Trainers.LightGbm(
+                                            labelColumn: labelName,
+                                            numLeaves: 4,
+                                            minDataPerLeaf: 6,
+                                            learningRate: 0.001));
+
+            // Fit this pipeline to the training data.
+            var model = pipeline.Fit(split.TrainSet);
+
+            // Get the feature importance based on the information gain used during training.
+            VBuffer<float> weights = default;
+            model.LastTransformer.Model.GetFeatureWeights(ref weights);
+            var weightsValues = weights.DenseValues().ToArray();
+            Console.WriteLine($"weight 0 - {weightsValues[0]}"); // CrimesPerCapita  (weight 0) = 0.1898361
+            Console.WriteLine($"weight 5 - {weightsValues[5]}"); // RoomsPerDwelling (weight 5) = 1
+
+            // Evaluate how the model is doing on the test data.
+            var dataWithPredictions = model.Transform(split.TestSet);
+            var metrics = mlContext.Regression.Evaluate(dataWithPredictions, label: labelName);
+            SamplesUtils.ConsoleUtils.PrintMetrics(metrics);
+
+            // Output
+            // L1: 4.97
+            // L2: 51.37
+            // LossFunction: 51.37
+            // RMS: 7.17
+            // RSquared: 0.08
+        }
+    }
+}