-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Added OneVersusAll and PairwiseCoupling samples. #3159
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
5a17e9b
01dccde
391969f
f08e9bb
209877d
9f2bf05
f36302a
5add22d
d8d7e98
4cf6199
589b0a8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using Microsoft.ML.Data; | ||
<# if (TrainerOptions != null) { #> | ||
<#=OptionsInclude#> | ||
<# } #> | ||
|
||
namespace Microsoft.ML.Samples.Dynamic.Trainers.MulticlassClassification | ||
{ | ||
public static class <#=ClassName#> | ||
{<#=Comments#> | ||
public static void Example() | ||
{ | ||
// Create a new context for ML.NET operations. It can be used for exception tracking and logging, | ||
// as a catalog of available operations and as the source of randomness. | ||
// Setting the seed to a fixed number in this example to make outputs deterministic. | ||
var mlContext = new MLContext(seed: 0); | ||
|
||
// Create a list of training data points. | ||
var dataPoints = GenerateRandomDataPoints(1000); | ||
|
||
// Convert the list of data points to an IDataView object, which is consumable by ML.NET API. | ||
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); | ||
|
||
<# if (MetaTrainer != null) { #> | ||
// Define the trainer. | ||
var pipeline = | ||
// Convert the string labels into key types. | ||
mlContext.Transforms.Conversion.MapValueToKey("Label") | ||
// Apply <#=MetaTrainer#> multiclass meta trainer on top of binary trainer. | ||
.Append(mlContext.MulticlassClassification.Trainers.<#=MetaTrainer#>(<#=Trainer#>())); | ||
<# } else if (TrainerOptions == null) { #> | ||
// Define the trainer. | ||
var pipeline = | ||
// Convert the string labels into key types. | ||
mlContext.Transforms.Conversion.MapValueToKey("Label") | ||
// Apply <#=Trainer#> multiclass trainer. | ||
.Append(mlContext.MulticlassClassification.Trainers.<#=Trainer#>()); | ||
<# } else { #> | ||
// Define trainer options. | ||
var options = new <#=TrainerOptions#>; | ||
|
||
// Define the trainer. | ||
var pipeline = mlContext.MulticlassClassification.Trainers.<#=Trainer#>(options); | ||
<# } #> | ||
|
||
// Train the model. | ||
var model = pipeline.Fit(trainingData); | ||
|
||
// Create testing data. Use different random seed to make it different from training data. | ||
var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); | ||
|
||
// Run the model on test data set. | ||
var transformedTestData = model.Transform(testData); | ||
|
||
// Convert IDataView object to a list. | ||
var predictions = mlContext.Data.CreateEnumerable<Prediction>(transformedTestData, reuseRowObject: false).ToList(); | ||
|
||
// Look at 5 predictions | ||
foreach (var p in predictions.Take(5)) | ||
Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please add ExpectedOutputPerInstance after this #Resolved |
||
// Evaluate the overall metrics | ||
var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData); | ||
SamplesUtils.ConsoleUtils.PrintMetrics(metrics); | ||
|
||
<#=ExpectedOutput#> | ||
} | ||
|
||
private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count, int seed=0) | ||
{ | ||
var random = new Random(seed); | ||
float randomFloat() => (float)random.NextDouble(); | ||
for (int i = 0; i < count; i++) | ||
{ | ||
// Generate Labels that are integers 0, 1 or 2 | ||
var label = random.Next(3); | ||
yield return new DataPoint | ||
{ | ||
Label = (uint)label, | ||
// Create random features that are correlated with the label. | ||
// The feature values are slightly increased by adding a constant multiple of label. | ||
Features = Enumerable.Repeat(label, 20).Select(x => randomFloat() + label * 0.2f).ToArray() | ||
}; | ||
} | ||
} | ||
|
||
// Example with label and 20 feature values. A data set is a collection of such examples. | ||
private class DataPoint | ||
{ | ||
public uint Label { get; set; } | ||
[VectorType(20)] | ||
public float[] Features { get; set; } | ||
} | ||
|
||
// Class used to capture predictions. | ||
private class Prediction | ||
{ | ||
// Original label. | ||
public uint Label { get; set; } | ||
// Predicted label from the trainer. | ||
public uint PredictedLabel { get; set; } | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using Microsoft.ML.Data; | ||
|
||
namespace Microsoft.ML.Samples.Dynamic.Trainers.MulticlassClassification | ||
{ | ||
public static class NaiveBayes | ||
{ | ||
public static void Example() | ||
{ | ||
// Create a new context for ML.NET operations. It can be used for exception tracking and logging, | ||
// as a catalog of available operations and as the source of randomness. | ||
// Setting the seed to a fixed number in this example to make outputs deterministic. | ||
var mlContext = new MLContext(seed: 0); | ||
|
||
// Create a list of training data points. | ||
var dataPoints = GenerateRandomDataPoints(1000); | ||
|
||
// Convert the list of data points to an IDataView object, which is consumable by ML.NET API. | ||
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); | ||
|
||
// Define the trainer. | ||
var pipeline = | ||
// Convert the string labels into key types. | ||
mlContext.Transforms.Conversion.MapValueToKey("Label") | ||
// Apply NaiveBayes multiclass trainer. | ||
.Append(mlContext.MulticlassClassification.Trainers.NaiveBayes()); | ||
|
||
// Train the model. | ||
var model = pipeline.Fit(trainingData); | ||
|
||
// Create testing data. Use different random seed to make it different from training data. | ||
var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); | ||
|
||
// Run the model on test data set. | ||
var transformedTestData = model.Transform(testData); | ||
|
||
// Convert IDataView object to a list. | ||
var predictions = mlContext.Data.CreateEnumerable<Prediction>(transformedTestData, reuseRowObject: false).ToList(); | ||
|
||
// Look at 5 predictions | ||
foreach (var p in predictions.Take(5)) | ||
Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); | ||
|
||
// Evaluate the overall metrics | ||
var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData); | ||
SamplesUtils.ConsoleUtils.PrintMetrics(metrics); | ||
|
||
// Expected output: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
how come this line is repeated? #Resolved |
||
// Expected output: | ||
// Micro Accuracy: 0.35 | ||
// Macro Accuracy: 0.33 | ||
// Log Loss: 34.54 | ||
// Log Loss Reduction: -30.47 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we usually indent the lines below Expected output with an extra space. #Resolved |
||
} | ||
|
||
private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count, int seed=0) | ||
{ | ||
var random = new Random(seed); | ||
float randomFloat() => (float)random.NextDouble(); | ||
for (int i = 0; i < count; i++) | ||
{ | ||
// Generate Labels that are integers 0, 1 or 2 | ||
var label = random.Next(3); | ||
yield return new DataPoint | ||
{ | ||
Label = (uint)label, | ||
// Create random features that are correlated with the label. | ||
// The feature values are slightly increased by adding a constant multiple of label. | ||
Features = Enumerable.Repeat(label, 20).Select(x => randomFloat() + label * 0.2f).ToArray() | ||
}; | ||
} | ||
} | ||
|
||
// Example with label and 20 feature values. A data set is a collection of such examples. | ||
private class DataPoint | ||
{ | ||
public uint Label { get; set; } | ||
[VectorType(20)] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
is the annotation necessary? #ByDesign There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
public float[] Features { get; set; } | ||
} | ||
|
||
// Class used to capture predictions. | ||
private class Prediction | ||
{ | ||
// Original label. | ||
public uint Label { get; set; } | ||
// Predicted label from the trainer. | ||
public uint PredictedLabel { get; set; } | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
<#@ include file="MulticlassClassification.ttinclude"#> | ||
<#+ | ||
string ClassName="NaiveBayes"; | ||
string Trainer = "NaiveBayes"; | ||
string MetaTrainer = null; | ||
string TrainerOptions = null; | ||
|
||
string OptionsInclude = ""; | ||
string Comments= ""; | ||
|
||
string ExpectedOutputPerInstance= @"// Expected output: | ||
// Label: 1, Prediction: 2 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
how come generated labels are 0,1,2 but here I see 1,2,3. how did they get changed? #Resolved There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
// Label: 2, Prediction: 2 | ||
// Label: 3, Prediction: 2 | ||
// Label: 2, Prediction: 2 | ||
// Label: 3, Prediction: 2"; | ||
|
||
string ExpectedOutput = @"// Expected output: | ||
// Expected output: | ||
// Micro Accuracy: 0.35 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
can we get something above 60%? this is much worse that the other two. #ByDesign There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
// Macro Accuracy: 0.33 | ||
// Log Loss: 34.54 | ||
// Log Loss Reduction: -30.47"; | ||
#> |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using Microsoft.ML.Data; | ||
|
||
namespace Microsoft.ML.Samples.Dynamic.Trainers.MulticlassClassification | ||
{ | ||
public static class OneVersusAll | ||
{ | ||
public static void Example() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
you probably want to link this file to extension method.
|
||
{ | ||
// Create a new context for ML.NET operations. It can be used for exception tracking and logging, | ||
// as a catalog of available operations and as the source of randomness. | ||
// Setting the seed to a fixed number in this example to make outputs deterministic. | ||
var mlContext = new MLContext(seed: 0); | ||
|
||
// Create a list of training data points. | ||
var dataPoints = GenerateRandomDataPoints(1000); | ||
|
||
// Convert the list of data points to an IDataView object, which is consumable by ML.NET API. | ||
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); | ||
|
||
// Define the trainer. | ||
var pipeline = | ||
// Convert the string labels into key types. | ||
mlContext.Transforms.Conversion.MapValueToKey("Label") | ||
// Apply OneVersusAll multiclass meta trainer on top of binary trainer. | ||
.Append(mlContext.MulticlassClassification.Trainers.OneVersusAll(mlContext.BinaryClassification.Trainers.SdcaLogisticRegression())); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. usually multiclass pipelines add a MapKeyToValue at the end. #ByDesign There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
||
// Train the model. | ||
var model = pipeline.Fit(trainingData); | ||
|
||
// Create testing data. Use different random seed to make it different from training data. | ||
var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); | ||
|
||
// Run the model on test data set. | ||
var transformedTestData = model.Transform(testData); | ||
|
||
// Convert IDataView object to a list. | ||
var predictions = mlContext.Data.CreateEnumerable<Prediction>(transformedTestData, reuseRowObject: false).ToList(); | ||
|
||
// Look at 5 predictions | ||
foreach (var p in predictions.Take(5)) | ||
Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); | ||
|
||
// Evaluate the overall metrics | ||
var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData); | ||
SamplesUtils.ConsoleUtils.PrintMetrics(metrics); | ||
|
||
// Expected output: | ||
// Expected output: | ||
// Micro Accuracy: 0.90 | ||
// Macro Accuracy: 0.90 | ||
// Log Loss: 0.37 | ||
// Log Loss Reduction: 0.67 | ||
} | ||
|
||
private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count, int seed=0) | ||
{ | ||
var random = new Random(seed); | ||
float randomFloat() => (float)random.NextDouble(); | ||
for (int i = 0; i < count; i++) | ||
{ | ||
// Generate Labels that are integers 0, 1 or 2 | ||
var label = random.Next(3); | ||
yield return new DataPoint | ||
{ | ||
Label = (uint)label, | ||
// Create random features that are correlated with the label. | ||
// The feature values are slightly increased by adding a constant multiple of label. | ||
Features = Enumerable.Repeat(label, 20).Select(x => randomFloat() + label * 0.2f).ToArray() | ||
}; | ||
} | ||
} | ||
|
||
// Example with label and 50 feature values. A data set is a collection of such examples. | ||
private class DataPoint | ||
{ | ||
public uint Label { get; set; } | ||
[VectorType(20)] | ||
public float[] Features { get; set; } | ||
} | ||
|
||
// Class used to capture predictions. | ||
private class Prediction | ||
{ | ||
// Original label. | ||
public uint Label { get; set; } | ||
// Predicted label from the trainer. | ||
public uint PredictedLabel { get; set; } | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
<#@ include file="MulticlassClassification.ttinclude"#> | ||
<#+ | ||
string ClassName="OneVersusAll"; | ||
string Trainer = "mlContext.BinaryClassification.Trainers.SdcaLogisticRegression"; | ||
string MetaTrainer = "OneVersusAll"; | ||
string TrainerOptions = null; | ||
|
||
string OptionsInclude = ""; | ||
string Comments= ""; | ||
|
||
string ExpectedOutputPerInstance= @"// Expected output: | ||
// Label: 1, Prediction: 1 | ||
// Label: 2, Prediction: 2 | ||
// Label: 3, Prediction: 2 | ||
// Label: 2, Prediction: 2 | ||
// Label: 3, Prediction: 2"; | ||
|
||
string ExpectedOutput = @"// Expected output: | ||
// Expected output: | ||
// Micro Accuracy: 0.90 | ||
// Macro Accuracy: 0.90 | ||
// Log Loss: 0.37 | ||
// Log Loss Reduction: 0.67"; | ||
#> |
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
please drop Microsoft.ML prefix as per #3205 #Resolved
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done
In reply to: 272754353 [](ancestors = 272754353)