diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude new file mode 100644 index 0000000000..0868f1be13 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude @@ -0,0 +1,110 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using Microsoft.ML; +using Microsoft.ML.Data; +using Microsoft.ML.SamplesUtils; +<# if (TrainerOptions != null) { #> +<#=OptionsInclude#> +<# } #> + +namespace Samples.Dynamic.Trainers.MulticlassClassification +{ + public static class <#=ClassName#> + {<#=Comments#> + public static void Example() + { + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + // Setting the seed to a fixed number in this example to make outputs deterministic. + var mlContext = new MLContext(seed: 0); + + // Create a list of training data points. + var dataPoints = GenerateRandomDataPoints(1000); + + // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); + +<# if (MetaTrainer != null) { #> + // Define the trainer. + var pipeline = + // Convert the string labels into key types. + mlContext.Transforms.Conversion.MapValueToKey("Label") + // Apply <#=MetaTrainer#> multiclass meta trainer on top of binary trainer. + .Append(mlContext.MulticlassClassification.Trainers.<#=MetaTrainer#>(<#=Trainer#>())); +<# } else if (TrainerOptions == null) { #> + // Define the trainer. + var pipeline = + // Convert the string labels into key types. + mlContext.Transforms.Conversion.MapValueToKey("Label") + // Apply <#=Trainer#> multiclass trainer. + .Append(mlContext.MulticlassClassification.Trainers.<#=Trainer#>()); +<# } else { #> + // Define trainer options. + var options = new <#=TrainerOptions#>; + + // Define the trainer. + var pipeline = mlContext.MulticlassClassification.Trainers.<#=Trainer#>(options); +<# } #> + + // Train the model. + var model = pipeline.Fit(trainingData); + + // Create testing data. Use different random seed to make it different from training data. + var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); + + // Run the model on test data set. + var transformedTestData = model.Transform(testData); + + // Convert IDataView object to a list. + var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + + // Look at 5 predictions + foreach (var p in predictions.Take(5)) + Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + + <#=ExpectedOutputPerInstance#> + + // Evaluate the overall metrics + var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData); + ConsoleUtils.PrintMetrics(metrics); + + <#=ExpectedOutput#> + } + + private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + { + var random = new Random(seed); + float randomFloat() => (float)random.NextDouble(); + for (int i = 0; i < count; i++) + { + // Generate Labels that are integers 1, 2 or 3 + var label = random.Next(1, 4); + yield return new DataPoint + { + Label = (uint)label, + // Create random features that are correlated with the label. + // The feature values are slightly increased by adding a constant multiple of label. + Features = Enumerable.Repeat(label, 20).Select(x => randomFloat() + label * 0.2f).ToArray() + }; + } + } + + // Example with label and 20 feature values. A data set is a collection of such examples. + private class DataPoint + { + public uint Label { get; set; } + [VectorType(20)] + public float[] Features { get; set; } + } + + // Class used to capture predictions. + private class Prediction + { + // Original label. + public uint Label { get; set; } + // Predicted label from the trainer. + public uint PredictedLabel { get; set; } + } + } +} \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs new file mode 100644 index 0000000000..49047b8422 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs @@ -0,0 +1,101 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using Microsoft.ML; +using Microsoft.ML.Data; +using Microsoft.ML.SamplesUtils; + +namespace Samples.Dynamic.Trainers.MulticlassClassification +{ + public static class OneVersusAll + { + public static void Example() + { + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + // Setting the seed to a fixed number in this example to make outputs deterministic. + var mlContext = new MLContext(seed: 0); + + // Create a list of training data points. + var dataPoints = GenerateRandomDataPoints(1000); + + // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); + + // Define the trainer. + var pipeline = + // Convert the string labels into key types. + mlContext.Transforms.Conversion.MapValueToKey("Label") + // Apply OneVersusAll multiclass meta trainer on top of binary trainer. + .Append(mlContext.MulticlassClassification.Trainers.OneVersusAll(mlContext.BinaryClassification.Trainers.SdcaLogisticRegression())); + + // Train the model. + var model = pipeline.Fit(trainingData); + + // Create testing data. Use different random seed to make it different from training data. + var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); + + // Run the model on test data set. + var transformedTestData = model.Transform(testData); + + // Convert IDataView object to a list. + var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + + // Look at 5 predictions + foreach (var p in predictions.Take(5)) + Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + + // Expected output: + // Label: 1, Prediction: 1 + // Label: 2, Prediction: 2 + // Label: 3, Prediction: 2 + // Label: 2, Prediction: 2 + // Label: 3, Prediction: 2 + + // Evaluate the overall metrics + var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData); + ConsoleUtils.PrintMetrics(metrics); + + // Expected output: + // Micro Accuracy: 0.90 + // Macro Accuracy: 0.90 + // Log Loss: 0.37 + // Log Loss Reduction: 0.67 + } + + private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + { + var random = new Random(seed); + float randomFloat() => (float)random.NextDouble(); + for (int i = 0; i < count; i++) + { + // Generate Labels that are integers 1, 2 or 3 + var label = random.Next(1, 4); + yield return new DataPoint + { + Label = (uint)label, + // Create random features that are correlated with the label. + // The feature values are slightly increased by adding a constant multiple of label. + Features = Enumerable.Repeat(label, 20).Select(x => randomFloat() + label * 0.2f).ToArray() + }; + } + } + + // Example with label and 20 feature values. A data set is a collection of such examples. + private class DataPoint + { + public uint Label { get; set; } + [VectorType(20)] + public float[] Features { get; set; } + } + + // Class used to capture predictions. + private class Prediction + { + // Original label. + public uint Label { get; set; } + // Predicted label from the trainer. + public uint PredictedLabel { get; set; } + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.tt new file mode 100644 index 0000000000..602c8c1497 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.tt @@ -0,0 +1,23 @@ +<#@ include file="MulticlassClassification.ttinclude"#> +<#+ +string ClassName="OneVersusAll"; +string Trainer = "mlContext.BinaryClassification.Trainers.SdcaLogisticRegression"; +string MetaTrainer = "OneVersusAll"; +string TrainerOptions = null; + +string OptionsInclude = ""; +string Comments= ""; + +string ExpectedOutputPerInstance= @"// Expected output: + // Label: 1, Prediction: 1 + // Label: 2, Prediction: 2 + // Label: 3, Prediction: 2 + // Label: 2, Prediction: 2 + // Label: 3, Prediction: 2"; + +string ExpectedOutput = @"// Expected output: + // Micro Accuracy: 0.90 + // Macro Accuracy: 0.90 + // Log Loss: 0.37 + // Log Loss Reduction: 0.67"; +#> \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs new file mode 100644 index 0000000000..07f5fe5645 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs @@ -0,0 +1,101 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using Microsoft.ML; +using Microsoft.ML.Data; +using Microsoft.ML.SamplesUtils; + +namespace Samples.Dynamic.Trainers.MulticlassClassification +{ + public static class PairwiseCoupling + { + public static void Example() + { + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + // Setting the seed to a fixed number in this example to make outputs deterministic. + var mlContext = new MLContext(seed: 0); + + // Create a list of training data points. + var dataPoints = GenerateRandomDataPoints(1000); + + // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); + + // Define the trainer. + var pipeline = + // Convert the string labels into key types. + mlContext.Transforms.Conversion.MapValueToKey("Label") + // Apply PairwiseCoupling multiclass meta trainer on top of binary trainer. + .Append(mlContext.MulticlassClassification.Trainers.PairwiseCoupling(mlContext.BinaryClassification.Trainers.SdcaLogisticRegression())); + + // Train the model. + var model = pipeline.Fit(trainingData); + + // Create testing data. Use different random seed to make it different from training data. + var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); + + // Run the model on test data set. + var transformedTestData = model.Transform(testData); + + // Convert IDataView object to a list. + var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + + // Look at 5 predictions + foreach (var p in predictions.Take(5)) + Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + + // Expected output: + // Label: 1, Prediction: 1 + // Label: 2, Prediction: 2 + // Label: 3, Prediction: 2 + // Label: 2, Prediction: 2 + // Label: 3, Prediction: 2 + + // Evaluate the overall metrics + var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData); + ConsoleUtils.PrintMetrics(metrics); + + // Expected output: + // Micro Accuracy: 0.90 + // Macro Accuracy: 0.90 + // Log Loss: 0.37 + // Log Loss Reduction: 0.67 + } + + private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + { + var random = new Random(seed); + float randomFloat() => (float)random.NextDouble(); + for (int i = 0; i < count; i++) + { + // Generate Labels that are integers 1, 2 or 3 + var label = random.Next(1, 4); + yield return new DataPoint + { + Label = (uint)label, + // Create random features that are correlated with the label. + // The feature values are slightly increased by adding a constant multiple of label. + Features = Enumerable.Repeat(label, 20).Select(x => randomFloat() + label * 0.1f).ToArray() + }; + } + } + + // Example with label and 20 feature values. A data set is a collection of such examples. + private class DataPoint + { + public uint Label { get; set; } + [VectorType(20)] + public float[] Features { get; set; } + } + + // Class used to capture predictions. + private class Prediction + { + // Original label. + public uint Label { get; set; } + // Predicted label from the trainer. + public uint PredictedLabel { get; set; } + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.tt new file mode 100644 index 0000000000..7fb3f9cb99 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.tt @@ -0,0 +1,23 @@ +<#@ include file="MulticlassClassification.ttinclude"#> +<#+ +string ClassName="PairwiseCoupling"; +string Trainer = "mlContext.BinaryClassification.Trainers.SdcaLogisticRegression"; +string MetaTrainer = "PairwiseCoupling"; +string TrainerOptions = null; + +string OptionsInclude = ""; +string Comments= ""; + +string ExpectedOutputPerInstance= @"// Expected output: + // Label: 1, Prediction: 1 + // Label: 2, Prediction: 2 + // Label: 3, Prediction: 2 + // Label: 2, Prediction: 2 + // Label: 3, Prediction: 2"; + +string ExpectedOutput = @"// Expected output: + // Micro Accuracy: 0.90 + // Macro Accuracy: 0.90 + // Log Loss: 0.37 + // Log Loss Reduction: 0.67"; +#> \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj b/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj index 8d83cc95bd..87fe28a7db 100644 --- a/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj +++ b/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj @@ -90,6 +90,14 @@ TextTemplatingFileGenerator LbfgsLogisticRegression.cs + + PairwiseCoupling.cs + TextTemplatingFileGenerator + + + OneVersusAll.cs + TextTemplatingFileGenerator + OnlineGradientDescent.cs TextTemplatingFileGenerator @@ -143,6 +151,16 @@ True LbfgsLogisticRegressionWithOptions.tt + + True + True + PairwiseCoupling.tt + + + True + True + OneVersusAll.tt + True True diff --git a/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs b/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs index 76b963d6cf..8c89d5ce43 100644 --- a/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs +++ b/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs @@ -700,6 +700,12 @@ private static ICalibratorTrainer GetCalibratorTrainerOrThrow(IExceptionContext /// Number of instances to train the calibrator. /// Use probabilities (vs. raw outputs) to identify top-score category. /// The type of the model. This type parameter will usually be inferred automatically from . + /// + /// + /// + /// public static OneVersusAllTrainer OneVersusAll(this MulticlassClassificationCatalog.MulticlassClassificationTrainers catalog, ITrainerEstimator, TModel> binaryEstimator, string labelColumnName = DefaultColumnNames.Label, @@ -733,6 +739,12 @@ public static OneVersusAllTrainer OneVersusAll(this MulticlassClassifica /// Whether to treat missing labels as having negative labels, instead of keeping them missing. /// Number of instances to train the calibrator. /// The type of the model. This type parameter will usually be inferred automatically from . + /// + /// + /// + /// public static PairwiseCouplingTrainer PairwiseCoupling(this MulticlassClassificationCatalog.MulticlassClassificationTrainers catalog, ITrainerEstimator, TModel> binaryEstimator, string labelColumnName = DefaultColumnNames.Label,