Skip to content

Commit 86d5dda

Browse files
Ivanidzo4kaTomFinley
authored andcommitted
Calibrators catalog introduction, rename PAV to Isotonic (#2766)
1 parent 3701389 commit 86d5dda

File tree

9 files changed

+426
-166
lines changed

9 files changed

+426
-166
lines changed

docs/samples/Microsoft.ML.Samples/Dynamic/Calibrator.cs

-113
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
using System;
2+
using System.Linq;
3+
4+
namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification.Calibrators
5+
{
6+
public static class FixedPlatt
7+
{
8+
public static void Example()
9+
{
10+
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
11+
// as a catalog of available operations and as the source of randomness.
12+
// Setting the seed to a fixed number in this example to make outputs deterministic.
13+
var mlContext = new MLContext(seed: 0);
14+
15+
// Download and featurize the dataset.
16+
var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);
17+
// Leave out 10% of data for testing.
18+
var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.3);
19+
20+
// Create data training pipeline for non calibrated trainer and train Naive calibrator on top of it.
21+
var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron();
22+
23+
// Fit the pipeline, and get a transformer that knows how to score new data.
24+
var transformer = pipeline.Fit(trainTestData.TrainSet);
25+
// Fit this pipeline to the training data.
26+
// Let's score the new data. The score will give us a numerical estimation of the chance that the particular sample
27+
// bears positive sentiment. This estimate is relative to the numbers obtained.
28+
var scoredData = transformer.Transform(trainTestData.TestSet);
29+
var scoredDataPreview = scoredData.Preview();
30+
31+
PrintRowViewValues(scoredDataPreview);
32+
// Preview of scoredDataPreview.RowView
33+
// Score 4.18144
34+
// Score -14.10248
35+
// Score 2.731951
36+
// Score -2.554229
37+
// Score 5.36571
38+
39+
// Let's train a calibrator estimator on this scored dataset. The trained calibrator estimator produces a transformer
40+
// that can transform the scored data by adding a new column names "Probability".
41+
var calibratorEstimator = mlContext.BinaryClassification.Calibrators.Platt(slope: -1f, offset: -0.05f);
42+
var calibratorTransformer = calibratorEstimator.Fit(scoredData);
43+
44+
// Transform the scored data with a calibrator transfomer by adding a new column names "Probability".
45+
// This column is a calibrated version of the "Score" column, meaning its values are a valid probability value in the [0, 1] interval
46+
// representing the chance that the respective sample bears positive sentiment.
47+
var finalData = calibratorTransformer.Transform(scoredData).Preview();
48+
PrintRowViewValues(finalData);
49+
// Score 4.18144 Probability 0.9856767
50+
// Score -14.10248 Probability 7.890148E-07
51+
// Score 2.731951 Probability 0.9416927
52+
// Score -2.554229 Probability 0.07556222
53+
// Score 5.36571 Probability 0.9955735
54+
}
55+
56+
private static void PrintRowViewValues(Data.DataDebuggerPreview data)
57+
{
58+
var firstRows = data.RowView.Take(5);
59+
60+
foreach (Data.DataDebuggerPreview.RowInfo row in firstRows)
61+
{
62+
foreach (var kvPair in row.Values)
63+
{
64+
if (kvPair.Key.Equals("Score") || kvPair.Key.Equals("Probability"))
65+
Console.Write($" {kvPair.Key} {kvPair.Value} ");
66+
}
67+
Console.WriteLine();
68+
}
69+
}
70+
}
71+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
using System;
2+
using System.Linq;
3+
4+
namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification.Calibrators
5+
{
6+
public static class Isotonic
7+
{
8+
public static void Example()
9+
{
10+
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
11+
// as a catalog of available operations and as the source of randomness.
12+
// Setting the seed to a fixed number in this example to make outputs deterministic.
13+
var mlContext = new MLContext(seed: 0);
14+
15+
// Download and featurize the dataset.
16+
var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);
17+
// Leave out 10% of data for testing.
18+
var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.3);
19+
20+
// Create data training pipeline for non calibrated trainer and train Naive calibrator on top of it.
21+
var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron();
22+
23+
// Fit the pipeline, and get a transformer that knows how to score new data.
24+
var transformer = pipeline.Fit(trainTestData.TrainSet);
25+
// Fit this pipeline to the training data.
26+
// Let's score the new data. The score will give us a numerical estimation of the chance that the particular sample
27+
// bears positive sentiment. This estimate is relative to the numbers obtained.
28+
var scoredData = transformer.Transform(trainTestData.TestSet);
29+
var scoredDataPreview = scoredData.Preview();
30+
31+
PrintRowViewValues(scoredDataPreview);
32+
// Preview of scoredDataPreview.RowView
33+
// Score 4.18144
34+
// Score -14.10248
35+
// Score 2.731951
36+
// Score -2.554229
37+
// Score 5.36571
38+
39+
// Let's train a calibrator estimator on this scored dataset. The trained calibrator estimator produces a transformer
40+
// that can transform the scored data by adding a new column names "Probability".
41+
var calibratorEstimator = mlContext.BinaryClassification.Calibrators.Isotonic();
42+
var calibratorTransformer = calibratorEstimator.Fit(scoredData);
43+
44+
// Transform the scored data with a calibrator transfomer by adding a new column names "Probability".
45+
// This column is a calibrated version of the "Score" column, meaning its values are a valid probability value in the [0, 1] interval
46+
// representing the chance that the respective sample bears positive sentiment.
47+
var finalData = calibratorTransformer.Transform(scoredData).Preview();
48+
PrintRowViewValues(finalData);
49+
// Score 4.18144 Probability 0.8
50+
// Score -14.10248 Probability 1E-15
51+
// Score 2.731951 Probability 0.7370371
52+
// Score -2.554229 Probability 0.2063954
53+
// Score 5.36571 Probability 0.8958333
54+
}
55+
56+
private static void PrintRowViewValues(Data.DataDebuggerPreview data)
57+
{
58+
var firstRows = data.RowView.Take(5);
59+
60+
foreach (Data.DataDebuggerPreview.RowInfo row in firstRows)
61+
{
62+
foreach (var kvPair in row.Values)
63+
{
64+
if (kvPair.Key.Equals("Score") || kvPair.Key.Equals("Probability"))
65+
Console.Write($" {kvPair.Key} {kvPair.Value} ");
66+
}
67+
Console.WriteLine();
68+
}
69+
}
70+
}
71+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
using System;
2+
using System.Linq;
3+
4+
namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification.Calibrators
5+
{
6+
public static class Naive
7+
{
8+
public static void Example()
9+
{
10+
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
11+
// as a catalog of available operations and as the source of randomness.
12+
// Setting the seed to a fixed number in this example to make outputs deterministic.
13+
var mlContext = new MLContext(seed: 0);
14+
15+
// Download and featurize the dataset.
16+
var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);
17+
// Leave out 10% of data for testing.
18+
var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.3);
19+
20+
// Create data training pipeline for non calibrated trainer and train Naive calibrator on top of it.
21+
var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron();
22+
23+
// Fit the pipeline, and get a transformer that knows how to score new data.
24+
var transformer = pipeline.Fit(trainTestData.TrainSet);
25+
// Fit this pipeline to the training data.
26+
// Let's score the new data. The score will give us a numerical estimation of the chance that the particular sample
27+
// bears positive sentiment. This estimate is relative to the numbers obtained.
28+
var scoredData = transformer.Transform(trainTestData.TestSet);
29+
var scoredDataPreview = scoredData.Preview();
30+
31+
PrintRowViewValues(scoredDataPreview);
32+
// Preview of scoredDataPreview.RowView
33+
// Score 4.18144
34+
// Score -14.10248
35+
// Score 2.731951
36+
// Score -2.554229
37+
// Score 5.36571
38+
39+
// Let's train a calibrator estimator on this scored dataset. The trained calibrator estimator produces a transformer
40+
// that can transform the scored data by adding a new column names "Probability".
41+
var calibratorEstimator = mlContext.BinaryClassification.Calibrators.Naive();
42+
var calibratorTransformer = calibratorEstimator.Fit(scoredData);
43+
44+
// Transform the scored data with a calibrator transfomer by adding a new column names "Probability".
45+
// This column is a calibrated version of the "Score" column, meaning its values are a valid probability value in the [0, 1] interval
46+
// representing the chance that the respective sample bears positive sentiment.
47+
var finalData = calibratorTransformer.Transform(scoredData).Preview();
48+
PrintRowViewValues(finalData);
49+
// Score 4.18144 Probability 0.775
50+
// Score -14.10248 Probability 0.01923077
51+
// Score 2.731951 Probability 0.7738096
52+
// Score -2.554229 Probability 0.2011494
53+
// Score 5.36571 Probability 0.9117647
54+
}
55+
56+
private static void PrintRowViewValues(Data.DataDebuggerPreview data)
57+
{
58+
var firstRows = data.RowView.Take(5);
59+
60+
foreach (Data.DataDebuggerPreview.RowInfo row in firstRows)
61+
{
62+
foreach (var kvPair in row.Values)
63+
{
64+
if (kvPair.Key.Equals("Score") || kvPair.Key.Equals("Probability"))
65+
Console.Write($" {kvPair.Key} {kvPair.Value} ");
66+
}
67+
Console.WriteLine();
68+
}
69+
}
70+
}
71+
}

0 commit comments

Comments
 (0)