Skip to content

Commit 6f576de

Browse files
authored
Update Feature Contribution Calculation Samples (#3241)
* Updating samples for FCC
1 parent a4fb9a8 commit 6f576de

File tree

5 files changed

+242
-123
lines changed

5 files changed

+242
-123
lines changed

docs/samples/Microsoft.ML.Samples/Dynamic/FeatureContributionCalculationTransform.cs

-120
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using Microsoft.ML;
5+
6+
namespace Samples.Dynamic
7+
{
8+
public static class CalculateFeatureContribution
9+
{
10+
public static void Example()
11+
{
12+
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
13+
// as a catalog of available operations and as the source of randomness.
14+
var mlContext = new MLContext(seed: 1);
15+
16+
// Create a small dataset.
17+
var samples = GenerateData();
18+
19+
// Convert training data to IDataView.
20+
var data = mlContext.Data.LoadFromEnumerable(samples);
21+
22+
// Create a pipeline to concatenate the features into a feature vector and normalize it.
23+
var transformPipeline = mlContext.Transforms.Concatenate("Features",
24+
new string[] { nameof(Data.Feature1), nameof(Data.Feature2) })
25+
.Append(mlContext.Transforms.NormalizeMeanVariance("Features"));
26+
27+
// Fit the pipeline.
28+
var transformer = transformPipeline.Fit(data);
29+
30+
// Transform the data.
31+
var transformedData = transformer.Transform(data);
32+
33+
// Define a linear trainer.
34+
var linearTrainer = mlContext.Regression.Trainers.Ols();
35+
36+
// Now we train the model and score it on the transformed data.
37+
var linearModel = linearTrainer.Fit(transformedData);
38+
// Print the model parameters.
39+
Console.WriteLine($"Linear Model Parameters");
40+
Console.WriteLine($"Bias: {linearModel.Model.Bias} Feature1: {linearModel.Model.Weights[0]} Feature2: {linearModel.Model.Weights[1]}");
41+
42+
// Define a feature contribution calculator for all the features, and don't normalize the contributions.
43+
// These are "trivial estimators" and they don't need to fit to the data, so we can feed a subset.
44+
var simpleScoredDataset = linearModel.Transform(mlContext.Data.TakeRows(transformedData, 1));
45+
var linearFeatureContributionCalculator = mlContext.Transforms.CalculateFeatureContribution(linearModel, normalize: false).Fit(simpleScoredDataset);
46+
47+
// Create a transformer chain to describe the entire pipeline.
48+
var scoringPipeline = transformer.Append(linearModel).Append(linearFeatureContributionCalculator);
49+
50+
// Create the prediction engine to get the features extracted from the text.
51+
var predictionEngine = mlContext.Model.CreatePredictionEngine<Data, ScoredData>(scoringPipeline);
52+
53+
// Convert the text into numeric features.
54+
var prediction = predictionEngine.Predict(samples.First());
55+
56+
// Write out the prediction, with contributions.
57+
// Note that for the linear model, the feature contributions for a feature in an example is the feature-weight*feature-value.
58+
// The total prediction is thus the bias plus the feature contributions.
59+
Console.WriteLine($"Label: {prediction.Label} Prediction: {prediction.Score}");
60+
Console.WriteLine($"Feature1: {prediction.Features[0]} Feature2: {prediction.Features[1]}");
61+
Console.WriteLine($"Feature Contributions: {prediction.FeatureContributions[0]} {prediction.FeatureContributions[1]}");
62+
63+
// Expected output:
64+
// Linear Model Parameters
65+
// Bias: -0.007505796 Feature1: 1.536963 Feature2: 3.031206
66+
// Label: 1.55184 Prediction: 1.389091
67+
// Feature1: -0.5053467 Feature2: 0.7169741
68+
// Feature Contributions: -0.7766994 2.173296
69+
}
70+
71+
private class Data
72+
{
73+
public float Label { get; set; }
74+
75+
public float Feature1 { get; set; }
76+
77+
public float Feature2 { get; set; }
78+
}
79+
80+
private class ScoredData : Data
81+
{
82+
public float Score { get; set; }
83+
public float[] Features { get; set; }
84+
public float[] FeatureContributions { get; set; }
85+
}
86+
87+
/// <summary>
88+
/// Generate an enumerable of Data objects, creating the label as a simple
89+
/// linear combination of the features.
90+
/// </summary>
91+
/// <param name="nExamples">The number of examples.</param>
92+
/// <param name="bias">The bias, or offset, in the calculation of the label.</param>
93+
/// <param name="weight1">The weight to multiply the first feature with to compute the label.</param>
94+
/// <param name="weight2">The weight to multiply the second feature with to compute the label.</param>
95+
/// <param name="seed">The seed for generating feature values and label noise.</param>
96+
/// <returns>An enumerable of Data objects.</returns>
97+
private static IEnumerable<Data> GenerateData(int nExamples = 10000,
98+
double bias = 0, double weight1 = 1, double weight2 = 2, int seed = 1)
99+
{
100+
var rng = new Random(seed);
101+
for (int i = 0; i < nExamples; i++)
102+
{
103+
var data = new Data
104+
{
105+
Feature1 = (float)(rng.Next(10) * (rng.NextDouble() - 0.5)),
106+
Feature2 = (float)(rng.Next(10) * (rng.NextDouble() - 0.5)),
107+
};
108+
109+
// Create a noisy label.
110+
data.Label = (float)(bias + weight1 * data.Feature1 + weight2 * data.Feature2 + rng.NextDouble() - 0.5);
111+
yield return data;
112+
}
113+
}
114+
}
115+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using Microsoft.ML;
5+
6+
namespace Samples.Dynamic
7+
{
8+
public static class CalculateFeatureContributionCalibrated
9+
{
10+
public static void Example()
11+
{
12+
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
13+
// as a catalog of available operations and as the source of randomness.
14+
var mlContext = new MLContext();
15+
16+
// Create a small dataset.
17+
var samples = GenerateData();
18+
19+
// Convert training data to IDataView.
20+
var data = mlContext.Data.LoadFromEnumerable(samples);
21+
22+
// Create a pipeline to concatenate the features into a feature vector and normalize it.
23+
var transformPipeline = mlContext.Transforms.Concatenate("Features",
24+
new string[] { nameof(Data.Feature1), nameof(Data.Feature2) })
25+
.Append(mlContext.Transforms.NormalizeMeanVariance("Features"));
26+
27+
// Fit the pipeline.
28+
var transformer = transformPipeline.Fit(data);
29+
30+
// Transform the data.
31+
var transformedData = transformer.Transform(data);
32+
33+
// Define a linear trainer.
34+
var linearTrainer = mlContext.BinaryClassification.Trainers.SdcaLogisticRegression();
35+
36+
// Now we train the model and score it on the transformed data.
37+
var linearModel = linearTrainer.Fit(transformedData);
38+
// Print the model parameters.
39+
Console.WriteLine($"Linear Model Parameters");
40+
Console.WriteLine("Bias: {0} Feature1: {1} Feature2: {2}",
41+
linearModel.Model.SubModel.Bias,
42+
linearModel.Model.SubModel.Weights[0],
43+
linearModel.Model.SubModel.Weights[1]);
44+
45+
// Define a feature contribution calculator for all the features, and don't normalize the contributions.
46+
// These are "trivial estimators" and they don't need to fit to the data, so we can feed a subset.
47+
var simpleScoredDataset = linearModel.Transform(mlContext.Data.TakeRows(transformedData, 1));
48+
var linearFeatureContributionCalculator = mlContext.Transforms.CalculateFeatureContribution(linearModel, normalize: false).Fit(simpleScoredDataset);
49+
50+
// Create a transformer chain to describe the entire pipeline.
51+
var scoringPipeline = transformer.Append(linearModel).Append(linearFeatureContributionCalculator);
52+
53+
// Create the prediction engine to get the features extracted from the text.
54+
var predictionEngine = mlContext.Model.CreatePredictionEngine<Data, ScoredData>(scoringPipeline);
55+
56+
// Convert the text into numeric features.
57+
var prediction = predictionEngine.Predict(samples.First());
58+
59+
// Write out the prediction, with contributions.
60+
// Note that for the linear model, the feature contributions for a feature in an example is the feature-weight*feature-value.
61+
// The total prediction is thus the bias plus the feature contributions.
62+
Console.WriteLine($"Label: {prediction.Label} Prediction-Score: {prediction.Score} Prediction-Probability: {prediction.Probability}");
63+
Console.WriteLine($"Feature1: {prediction.Features[0]} Feature2: {prediction.Features[1]}");
64+
Console.WriteLine($"Feature Contributions: {prediction.FeatureContributions[0]} {prediction.FeatureContributions[1]}");
65+
66+
// Expected output:
67+
// Linear Model Parameters
68+
// Bias: 0.003757346 Feature1: 9.070082 Feature2: 17.7816
69+
// Label: True Prediction-Score: 8.169167 Prediction-Probability: 0.9997168
70+
// Feature1: -0.5053467 Feature2: 0.7169741
71+
// Feature Contributions: -4.583536 12.74894
72+
}
73+
74+
private class Data
75+
{
76+
public bool Label { get; set; }
77+
78+
public float Feature1 { get; set; }
79+
80+
public float Feature2 { get; set; }
81+
}
82+
83+
private class ScoredData : Data
84+
{
85+
public float Score { get; set; }
86+
87+
public float Probability { get; set; }
88+
89+
public float[] Features { get; set; }
90+
91+
public float[] FeatureContributions { get; set; }
92+
}
93+
94+
/// <summary>
95+
/// Generate an enumerable of Data objects, creating the label as a simple
96+
/// linear combination of the features.
97+
/// </summary>
98+
/// <param name="nExamples">The number of examples.</param>
99+
/// <param name="bias">The bias, or offset, in the calculation of the label.</param>
100+
/// <param name="weight1">The weight to multiply the first feature with to compute the label.</param>
101+
/// <param name="weight2">The weight to multiply the second feature with to compute the label.</param>
102+
/// <param name="seed">The seed for generating feature values and label noise.</param>
103+
/// <returns>An enumerable of Data objects.</returns>
104+
private static IEnumerable<Data> GenerateData(int nExamples = 10000,
105+
double bias = 0, double weight1 = 1, double weight2 = 2, int seed = 1)
106+
{
107+
var rng = new Random(seed);
108+
for (int i = 0; i < nExamples; i++)
109+
{
110+
var data = new Data
111+
{
112+
Feature1 = (float)(rng.Next(10) * (rng.NextDouble() - 0.5)),
113+
Feature2 = (float)(rng.Next(10) * (rng.NextDouble() - 0.5)),
114+
};
115+
116+
// Create a Boolean label with noise.
117+
var value = bias + weight1 * data.Feature1 + weight2 * data.Feature2 + rng.NextDouble() - 0.5;
118+
data.Label = Sigmoid(value) > 0.5;
119+
yield return data;
120+
}
121+
}
122+
private static double Sigmoid(double x) => 1.0 / (1.0 + Math.Exp(-1 * x));
123+
}
124+
}

docs/samples/Microsoft.ML.Samples/Program.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ internal static class Program
66
{
77
static void Main(string[] args)
88
{
9-
ReplaceMissingValues.Example();
9+
CalculateFeatureContribution.Example();
1010
}
1111
}
1212
}

src/Microsoft.ML.Data/Transforms/ExplainabilityCatalog.cs

+2-2
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ public static class ExplainabilityCatalog
2828
/// <example>
2929
/// <format type="text/markdown">
3030
/// <![CDATA[
31-
/// [!code-csharp[FCT](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureContributionCalculationTransform.cs)]
31+
/// [!code-csharp[CalculateFeatureContribution](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CalculateFeatureContribution.cs)]
3232
/// ]]>
3333
/// </format>
3434
/// </example>
@@ -54,7 +54,7 @@ public static FeatureContributionCalculatingEstimator CalculateFeatureContributi
5454
/// <example>
5555
/// <format type="text/markdown">
5656
/// <![CDATA[
57-
/// [!code-csharp[FCT](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureContributionCalculationTransform.cs)]
57+
/// [!code-csharp[CalculateFeatureContributionCalibrated](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CalculateFeatureContributionCalibrated.cs)]
5858
/// ]]>
5959
/// </format>
6060
/// </example>

0 commit comments

Comments
 (0)