Skip to content

Commit f19b560

Browse files
authored
Added OneVersusAll and PairwiseCoupling samples. (#3159)
* OVA sample * Add PairwiseCoupling sample * Use tt templates * Add example link to extension methods. Add NaiveBayes * fix comments * fix comments * fix comments * Add [BestFriend] for GraphRunner * rollback BestFriend * fix comments * remove NB for now
1 parent fc89745 commit f19b560

File tree

7 files changed

+388
-0
lines changed

7 files changed

+388
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using Microsoft.ML;
5+
using Microsoft.ML.Data;
6+
using Microsoft.ML.SamplesUtils;
7+
<# if (TrainerOptions != null) { #>
8+
<#=OptionsInclude#>
9+
<# } #>
10+
11+
namespace Samples.Dynamic.Trainers.MulticlassClassification
12+
{
13+
public static class <#=ClassName#>
14+
{<#=Comments#>
15+
public static void Example()
16+
{
17+
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
18+
// as a catalog of available operations and as the source of randomness.
19+
// Setting the seed to a fixed number in this example to make outputs deterministic.
20+
var mlContext = new MLContext(seed: 0);
21+
22+
// Create a list of training data points.
23+
var dataPoints = GenerateRandomDataPoints(1000);
24+
25+
// Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
26+
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);
27+
28+
<# if (MetaTrainer != null) { #>
29+
// Define the trainer.
30+
var pipeline =
31+
// Convert the string labels into key types.
32+
mlContext.Transforms.Conversion.MapValueToKey("Label")
33+
// Apply <#=MetaTrainer#> multiclass meta trainer on top of binary trainer.
34+
.Append(mlContext.MulticlassClassification.Trainers.<#=MetaTrainer#>(<#=Trainer#>()));
35+
<# } else if (TrainerOptions == null) { #>
36+
// Define the trainer.
37+
var pipeline =
38+
// Convert the string labels into key types.
39+
mlContext.Transforms.Conversion.MapValueToKey("Label")
40+
// Apply <#=Trainer#> multiclass trainer.
41+
.Append(mlContext.MulticlassClassification.Trainers.<#=Trainer#>());
42+
<# } else { #>
43+
// Define trainer options.
44+
var options = new <#=TrainerOptions#>;
45+
46+
// Define the trainer.
47+
var pipeline = mlContext.MulticlassClassification.Trainers.<#=Trainer#>(options);
48+
<# } #>
49+
50+
// Train the model.
51+
var model = pipeline.Fit(trainingData);
52+
53+
// Create testing data. Use different random seed to make it different from training data.
54+
var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));
55+
56+
// Run the model on test data set.
57+
var transformedTestData = model.Transform(testData);
58+
59+
// Convert IDataView object to a list.
60+
var predictions = mlContext.Data.CreateEnumerable<Prediction>(transformedTestData, reuseRowObject: false).ToList();
61+
62+
// Look at 5 predictions
63+
foreach (var p in predictions.Take(5))
64+
Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}");
65+
66+
<#=ExpectedOutputPerInstance#>
67+
68+
// Evaluate the overall metrics
69+
var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData);
70+
ConsoleUtils.PrintMetrics(metrics);
71+
72+
<#=ExpectedOutput#>
73+
}
74+
75+
private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count, int seed=0)
76+
{
77+
var random = new Random(seed);
78+
float randomFloat() => (float)random.NextDouble();
79+
for (int i = 0; i < count; i++)
80+
{
81+
// Generate Labels that are integers 1, 2 or 3
82+
var label = random.Next(1, 4);
83+
yield return new DataPoint
84+
{
85+
Label = (uint)label,
86+
// Create random features that are correlated with the label.
87+
// The feature values are slightly increased by adding a constant multiple of label.
88+
Features = Enumerable.Repeat(label, 20).Select(x => randomFloat() + label * 0.2f).ToArray()
89+
};
90+
}
91+
}
92+
93+
// Example with label and 20 feature values. A data set is a collection of such examples.
94+
private class DataPoint
95+
{
96+
public uint Label { get; set; }
97+
[VectorType(20)]
98+
public float[] Features { get; set; }
99+
}
100+
101+
// Class used to capture predictions.
102+
private class Prediction
103+
{
104+
// Original label.
105+
public uint Label { get; set; }
106+
// Predicted label from the trainer.
107+
public uint PredictedLabel { get; set; }
108+
}
109+
}
110+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using Microsoft.ML;
5+
using Microsoft.ML.Data;
6+
using Microsoft.ML.SamplesUtils;
7+
8+
namespace Samples.Dynamic.Trainers.MulticlassClassification
9+
{
10+
public static class OneVersusAll
11+
{
12+
public static void Example()
13+
{
14+
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
15+
// as a catalog of available operations and as the source of randomness.
16+
// Setting the seed to a fixed number in this example to make outputs deterministic.
17+
var mlContext = new MLContext(seed: 0);
18+
19+
// Create a list of training data points.
20+
var dataPoints = GenerateRandomDataPoints(1000);
21+
22+
// Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
23+
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);
24+
25+
// Define the trainer.
26+
var pipeline =
27+
// Convert the string labels into key types.
28+
mlContext.Transforms.Conversion.MapValueToKey("Label")
29+
// Apply OneVersusAll multiclass meta trainer on top of binary trainer.
30+
.Append(mlContext.MulticlassClassification.Trainers.OneVersusAll(mlContext.BinaryClassification.Trainers.SdcaLogisticRegression()));
31+
32+
// Train the model.
33+
var model = pipeline.Fit(trainingData);
34+
35+
// Create testing data. Use different random seed to make it different from training data.
36+
var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));
37+
38+
// Run the model on test data set.
39+
var transformedTestData = model.Transform(testData);
40+
41+
// Convert IDataView object to a list.
42+
var predictions = mlContext.Data.CreateEnumerable<Prediction>(transformedTestData, reuseRowObject: false).ToList();
43+
44+
// Look at 5 predictions
45+
foreach (var p in predictions.Take(5))
46+
Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}");
47+
48+
// Expected output:
49+
// Label: 1, Prediction: 1
50+
// Label: 2, Prediction: 2
51+
// Label: 3, Prediction: 2
52+
// Label: 2, Prediction: 2
53+
// Label: 3, Prediction: 2
54+
55+
// Evaluate the overall metrics
56+
var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData);
57+
ConsoleUtils.PrintMetrics(metrics);
58+
59+
// Expected output:
60+
// Micro Accuracy: 0.90
61+
// Macro Accuracy: 0.90
62+
// Log Loss: 0.37
63+
// Log Loss Reduction: 0.67
64+
}
65+
66+
private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count, int seed=0)
67+
{
68+
var random = new Random(seed);
69+
float randomFloat() => (float)random.NextDouble();
70+
for (int i = 0; i < count; i++)
71+
{
72+
// Generate Labels that are integers 1, 2 or 3
73+
var label = random.Next(1, 4);
74+
yield return new DataPoint
75+
{
76+
Label = (uint)label,
77+
// Create random features that are correlated with the label.
78+
// The feature values are slightly increased by adding a constant multiple of label.
79+
Features = Enumerable.Repeat(label, 20).Select(x => randomFloat() + label * 0.2f).ToArray()
80+
};
81+
}
82+
}
83+
84+
// Example with label and 20 feature values. A data set is a collection of such examples.
85+
private class DataPoint
86+
{
87+
public uint Label { get; set; }
88+
[VectorType(20)]
89+
public float[] Features { get; set; }
90+
}
91+
92+
// Class used to capture predictions.
93+
private class Prediction
94+
{
95+
// Original label.
96+
public uint Label { get; set; }
97+
// Predicted label from the trainer.
98+
public uint PredictedLabel { get; set; }
99+
}
100+
}
101+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
<#@ include file="MulticlassClassification.ttinclude"#>
2+
<#+
3+
string ClassName="OneVersusAll";
4+
string Trainer = "mlContext.BinaryClassification.Trainers.SdcaLogisticRegression";
5+
string MetaTrainer = "OneVersusAll";
6+
string TrainerOptions = null;
7+
8+
string OptionsInclude = "";
9+
string Comments= "";
10+
11+
string ExpectedOutputPerInstance= @"// Expected output:
12+
// Label: 1, Prediction: 1
13+
// Label: 2, Prediction: 2
14+
// Label: 3, Prediction: 2
15+
// Label: 2, Prediction: 2
16+
// Label: 3, Prediction: 2";
17+
18+
string ExpectedOutput = @"// Expected output:
19+
// Micro Accuracy: 0.90
20+
// Macro Accuracy: 0.90
21+
// Log Loss: 0.37
22+
// Log Loss Reduction: 0.67";
23+
#>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using Microsoft.ML;
5+
using Microsoft.ML.Data;
6+
using Microsoft.ML.SamplesUtils;
7+
8+
namespace Samples.Dynamic.Trainers.MulticlassClassification
9+
{
10+
public static class PairwiseCoupling
11+
{
12+
public static void Example()
13+
{
14+
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
15+
// as a catalog of available operations and as the source of randomness.
16+
// Setting the seed to a fixed number in this example to make outputs deterministic.
17+
var mlContext = new MLContext(seed: 0);
18+
19+
// Create a list of training data points.
20+
var dataPoints = GenerateRandomDataPoints(1000);
21+
22+
// Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
23+
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);
24+
25+
// Define the trainer.
26+
var pipeline =
27+
// Convert the string labels into key types.
28+
mlContext.Transforms.Conversion.MapValueToKey("Label")
29+
// Apply PairwiseCoupling multiclass meta trainer on top of binary trainer.
30+
.Append(mlContext.MulticlassClassification.Trainers.PairwiseCoupling(mlContext.BinaryClassification.Trainers.SdcaLogisticRegression()));
31+
32+
// Train the model.
33+
var model = pipeline.Fit(trainingData);
34+
35+
// Create testing data. Use different random seed to make it different from training data.
36+
var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));
37+
38+
// Run the model on test data set.
39+
var transformedTestData = model.Transform(testData);
40+
41+
// Convert IDataView object to a list.
42+
var predictions = mlContext.Data.CreateEnumerable<Prediction>(transformedTestData, reuseRowObject: false).ToList();
43+
44+
// Look at 5 predictions
45+
foreach (var p in predictions.Take(5))
46+
Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}");
47+
48+
// Expected output:
49+
// Label: 1, Prediction: 1
50+
// Label: 2, Prediction: 2
51+
// Label: 3, Prediction: 2
52+
// Label: 2, Prediction: 2
53+
// Label: 3, Prediction: 2
54+
55+
// Evaluate the overall metrics
56+
var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData);
57+
ConsoleUtils.PrintMetrics(metrics);
58+
59+
// Expected output:
60+
// Micro Accuracy: 0.90
61+
// Macro Accuracy: 0.90
62+
// Log Loss: 0.37
63+
// Log Loss Reduction: 0.67
64+
}
65+
66+
private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count, int seed=0)
67+
{
68+
var random = new Random(seed);
69+
float randomFloat() => (float)random.NextDouble();
70+
for (int i = 0; i < count; i++)
71+
{
72+
// Generate Labels that are integers 1, 2 or 3
73+
var label = random.Next(1, 4);
74+
yield return new DataPoint
75+
{
76+
Label = (uint)label,
77+
// Create random features that are correlated with the label.
78+
// The feature values are slightly increased by adding a constant multiple of label.
79+
Features = Enumerable.Repeat(label, 20).Select(x => randomFloat() + label * 0.1f).ToArray()
80+
};
81+
}
82+
}
83+
84+
// Example with label and 20 feature values. A data set is a collection of such examples.
85+
private class DataPoint
86+
{
87+
public uint Label { get; set; }
88+
[VectorType(20)]
89+
public float[] Features { get; set; }
90+
}
91+
92+
// Class used to capture predictions.
93+
private class Prediction
94+
{
95+
// Original label.
96+
public uint Label { get; set; }
97+
// Predicted label from the trainer.
98+
public uint PredictedLabel { get; set; }
99+
}
100+
}
101+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
<#@ include file="MulticlassClassification.ttinclude"#>
2+
<#+
3+
string ClassName="PairwiseCoupling";
4+
string Trainer = "mlContext.BinaryClassification.Trainers.SdcaLogisticRegression";
5+
string MetaTrainer = "PairwiseCoupling";
6+
string TrainerOptions = null;
7+
8+
string OptionsInclude = "";
9+
string Comments= "";
10+
11+
string ExpectedOutputPerInstance= @"// Expected output:
12+
// Label: 1, Prediction: 1
13+
// Label: 2, Prediction: 2
14+
// Label: 3, Prediction: 2
15+
// Label: 2, Prediction: 2
16+
// Label: 3, Prediction: 2";
17+
18+
string ExpectedOutput = @"// Expected output:
19+
// Micro Accuracy: 0.90
20+
// Macro Accuracy: 0.90
21+
// Log Loss: 0.37
22+
// Log Loss Reduction: 0.67";
23+
#>

docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj

+18
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,14 @@
9090
<Generator>TextTemplatingFileGenerator</Generator>
9191
<LastGenOutput>LbfgsLogisticRegression.cs</LastGenOutput>
9292
</None>
93+
<None Update="Dynamic\Trainers\MulticlassClassification\PairwiseCoupling.tt">
94+
<LastGenOutput>PairwiseCoupling.cs</LastGenOutput>
95+
<Generator>TextTemplatingFileGenerator</Generator>
96+
</None>
97+
<None Update="Dynamic\Trainers\MulticlassClassification\OneVersusAll.tt">
98+
<LastGenOutput>OneVersusAll.cs</LastGenOutput>
99+
<Generator>TextTemplatingFileGenerator</Generator>
100+
</None>
93101
<None Update="Dynamic\Trainers\Regression\OnlineGradientDescent.tt">
94102
<LastGenOutput>OnlineGradientDescent.cs</LastGenOutput>
95103
<Generator>TextTemplatingFileGenerator</Generator>
@@ -143,6 +151,16 @@
143151
<AutoGen>True</AutoGen>
144152
<DependentUpon>LbfgsLogisticRegressionWithOptions.tt</DependentUpon>
145153
</Compile>
154+
<Compile Update="Dynamic\Trainers\MulticlassClassification\PairwiseCoupling.cs">
155+
<DesignTime>True</DesignTime>
156+
<AutoGen>True</AutoGen>
157+
<DependentUpon>PairwiseCoupling.tt</DependentUpon>
158+
</Compile>
159+
<Compile Update="Dynamic\Trainers\MulticlassClassification\OneVersusAll.cs">
160+
<DesignTime>True</DesignTime>
161+
<AutoGen>True</AutoGen>
162+
<DependentUpon>OneVersusAll.tt</DependentUpon>
163+
</Compile>
146164
<Compile Update="Dynamic\Trainers\Regression\OnlineGradientDescent.cs">
147165
<DesignTime>True</DesignTime>
148166
<AutoGen>True</AutoGen>

0 commit comments

Comments
 (0)