-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Static pipeline column indexers, binary/regression evaluators #869
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,8 @@ | |
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using Microsoft.ML.Data.StaticPipe; | ||
using Microsoft.ML.Data.StaticPipe.Runtime; | ||
using Microsoft.ML.Runtime; | ||
using Microsoft.ML.Runtime.CommandLine; | ||
using Microsoft.ML.Runtime.Data; | ||
|
@@ -410,66 +412,58 @@ public sealed class Counters | |
|
||
public Double Acc | ||
{ | ||
get | ||
{ | ||
get { | ||
return (NumTrueNeg + NumTruePos) / (NumTruePos + NumTrueNeg + NumFalseNeg + NumFalsePos); | ||
} | ||
} | ||
|
||
public Double RecallPos | ||
{ | ||
get | ||
{ | ||
get { | ||
return (NumTruePos + NumFalseNeg > 0) ? NumTruePos / (NumTruePos + NumFalseNeg) : 0; | ||
} | ||
} | ||
|
||
public Double PrecisionPos | ||
{ | ||
get | ||
{ | ||
get { | ||
return (NumTruePos + NumFalsePos > 0) ? NumTruePos / (NumTruePos + NumFalsePos) : 0; | ||
} | ||
} | ||
|
||
public Double RecallNeg | ||
{ | ||
get | ||
{ | ||
get { | ||
return (NumTrueNeg + NumFalsePos > 0) ? NumTrueNeg / (NumTrueNeg + NumFalsePos) : 0; | ||
} | ||
} | ||
|
||
public Double PrecisionNeg | ||
{ | ||
get | ||
{ | ||
get { | ||
return (NumTrueNeg + NumFalseNeg > 0) ? NumTrueNeg / (NumTrueNeg + NumFalseNeg) : 0; | ||
} | ||
} | ||
|
||
public Double Entropy | ||
{ | ||
get | ||
{ | ||
get { | ||
return MathUtils.Entropy((NumTruePos + NumFalseNeg) / | ||
(NumTruePos + NumTrueNeg + NumFalseNeg + NumFalsePos)); | ||
} | ||
} | ||
|
||
public Double LogLoss | ||
{ | ||
get | ||
{ | ||
get { | ||
return Double.IsNaN(_logLoss) ? Double.NaN : (_numLogLossPositives + _numLogLossNegatives > 0) | ||
? _logLoss / (_numLogLossPositives + _numLogLossNegatives) : 0; | ||
} | ||
} | ||
|
||
public Double LogLossReduction | ||
{ | ||
get | ||
{ | ||
get { | ||
if (_numLogLossPositives + _numLogLossNegatives == 0) | ||
return 0; | ||
var logLoss = _logLoss / (_numLogLossPositives + _numLogLossNegatives); | ||
|
@@ -787,6 +781,246 @@ private void ComputePrCurves() | |
} | ||
} | ||
} | ||
|
||
/// <summary> | ||
/// Evaluation results for binary classifiers, excluding probabilistic metrics. | ||
/// </summary> | ||
public class Result | ||
{ | ||
/// <summary> | ||
/// Gets the area under the ROC curve. | ||
/// </summary> | ||
/// <remarks> | ||
/// The area under the ROC curve is equal to the probability that the classifier ranks | ||
/// a randomly chosen positive instance higher than a randomly chosen negative one | ||
/// (assuming 'positive' ranks higher than 'negative'). | ||
/// </remarks> | ||
public double Auc { get; } | ||
|
||
/// <summary> | ||
/// Gets the accuracy of a classifier which is the proportion of correct predictions in the test set. | ||
/// </summary> | ||
public double Accuracy { get; } | ||
|
||
/// <summary> | ||
/// Gets the positive precision of a classifier which is the proportion of correctly predicted | ||
/// positive instances among all the positive predictions (i.e., the number of positive instances | ||
/// predicted as positive, divided by the total number of instances predicted as positive). | ||
/// </summary> | ||
public double PositivePrecision { get; } | ||
|
||
/// <summary> | ||
/// Gets the positive recall of a classifier which is the proportion of correctly predicted | ||
/// positive instances among all the positive instances (i.e., the number of positive instances | ||
/// predicted as positive, divided by the total number of positive instances). | ||
/// </summary> | ||
public double PositiveRecall { get; private set; } | ||
|
||
/// <summary> | ||
/// Gets the negative precision of a classifier which is the proportion of correctly predicted | ||
/// negative instances among all the negative predictions (i.e., the number of negative instances | ||
/// predicted as negative, divided by the total number of instances predicted as negative). | ||
/// </summary> | ||
public double NegativePrecision { get; } | ||
|
||
/// <summary> | ||
/// Gets the negative recall of a classifier which is the proportion of correctly predicted | ||
/// negative instances among all the negative instances (i.e., the number of negative instances | ||
/// predicted as negative, divided by the total number of negative instances). | ||
/// </summary> | ||
public double NegativeRecall { get; } | ||
|
||
/// <summary> | ||
/// Gets the F1 score of the classifier. | ||
/// </summary> | ||
/// <remarks> | ||
/// F1 score is the harmonic mean of precision and recall: 2 * precision * recall / (precision + recall). | ||
/// </remarks> | ||
public double F1Score { get; } | ||
|
||
/// <summary> | ||
/// Gets the area under the precision/recall curve of the classifier. | ||
/// </summary> | ||
/// <remarks> | ||
/// The area under the precision/recall curve is a single number summary of the information in the | ||
/// precision/recall curve. It is increasingly used in the machine learning community, particularly | ||
/// for imbalanced datasets where one class is observed more frequently than the other. On these | ||
/// datasets, AUPRC can highlight performance differences that are lost with AUC. | ||
/// </remarks> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. BTW, really like the explanation of the metric. #Resolved There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I actually just copy pasted it from the soon-to-be-deprecated metrics structure. ;) I'm not sure whom you should thank then. @zeahmed maybe? Someone else? #Closed |
||
public double Auprc { get; } | ||
|
||
protected private static T Fetch<T>(IExceptionContext ectx, IRow row, string name) | ||
{ | ||
if (!row.Schema.TryGetColumnIndex(name, out int col)) | ||
throw ectx.Except($"Could not find column '{name}'"); | ||
T val = default; | ||
row.GetGetter<T>(col)(ref val); | ||
return val; | ||
} | ||
|
||
internal Result(IExceptionContext ectx, IRow overallResult) | ||
{ | ||
double Fetch(string name) => Fetch<double>(ectx, overallResult, name); | ||
Auc = Fetch(BinaryClassifierEvaluator.Auc); | ||
Accuracy = Fetch(BinaryClassifierEvaluator.Accuracy); | ||
PositivePrecision = Fetch(BinaryClassifierEvaluator.PosPrecName); | ||
PositiveRecall = Fetch(BinaryClassifierEvaluator.PosRecallName); | ||
NegativePrecision = Fetch(BinaryClassifierEvaluator.NegPrecName); | ||
NegativeRecall = Fetch(BinaryClassifierEvaluator.NegRecallName); | ||
F1Score = Fetch(BinaryClassifierEvaluator.F1); | ||
Auprc = Fetch(BinaryClassifierEvaluator.AuPrc); | ||
} | ||
} | ||
|
||
/// <summary> | ||
/// Evaluation results for binary classifiers, including probabilistic metrics. | ||
/// </summary> | ||
public sealed class CalibratedResult : Result | ||
{ | ||
/// <summary> | ||
/// Gets the log-loss of the classifier. | ||
/// </summary> | ||
/// <remarks> | ||
/// The log-loss metric, is computed as follows: | ||
/// LL = - (1/m) * sum( log(p[i])) | ||
/// where m is the number of instances in the test set. | ||
/// p[i] is the probability returned by the classifier if the instance belongs to class 1, | ||
/// and 1 minus the probability returned by the classifier if the instance belongs to class 0. | ||
/// </remarks> | ||
public double LogLoss { get; } | ||
|
||
/// <summary> | ||
/// Gets the log-loss reduction (also known as relative log-loss, or reduction in information gain - RIG) | ||
/// of the classifier. | ||
/// </summary> | ||
/// <remarks> | ||
/// The log-loss reduction is scaled relative to a classifier that predicts the prior for every example: | ||
/// (LL(prior) - LL(classifier)) / LL(prior) | ||
/// This metric can be interpreted as the advantage of the classifier over a random prediction. | ||
/// E.g., if the RIG equals 20, it can be interpreted as "the probability of a correct prediction is | ||
/// 20% better than random guessing." | ||
/// </remarks> | ||
public double LogLossReduction { get; } | ||
|
||
/// <summary> | ||
/// Gets the test-set entropy (prior Log-Loss/instance) of the classifier. | ||
/// </summary> | ||
public double Entropy { get; } | ||
|
||
internal CalibratedResult(IExceptionContext ectx, IRow overallResult) | ||
: base(ectx, overallResult) | ||
{ | ||
double Fetch(string name) => Fetch<double>(ectx, overallResult, name); | ||
LogLoss = Fetch(BinaryClassifierEvaluator.LogLoss); | ||
LogLossReduction = Fetch(BinaryClassifierEvaluator.LogLossReduction); | ||
Entropy = Fetch(BinaryClassifierEvaluator.Entropy); | ||
} | ||
} | ||
|
||
/// <summary> | ||
/// Evaluates scored binary classification data. | ||
/// </summary> | ||
/// <typeparam name="T">The shape type for the input data.</typeparam> | ||
/// <param name="data">The data to evaluate.</param> | ||
/// <param name="label">The index delegate for the label column.</param> | ||
/// <param name="pred">The index delegate for columns from calibrated prediction of a binary classifier. | ||
/// Under typical scenarios, this will just be the same tuple of results returned from the trainer.</param> | ||
/// <returns>The evaluation results for these calibrated outputs.</returns> | ||
public static CalibratedResult Evaluate<T>( | ||
DataView<T> data, | ||
Func<T, Scalar<bool>> label, | ||
Func<T, (Scalar<float> score, Scalar<float> probability, Scalar<bool> predictedLabel)> pred) | ||
{ | ||
Contracts.CheckValue(data, nameof(data)); | ||
var env = StaticPipeUtils.GetEnvironment(data); | ||
Contracts.AssertValue(env); | ||
env.CheckValue(label, nameof(label)); | ||
env.CheckValue(pred, nameof(pred)); | ||
|
||
var indexer = StaticPipeUtils.GetIndexer(data); | ||
string labelName = indexer.Get(label(indexer.Indices)); | ||
(var scoreCol, var probCol, var predCol) = pred(indexer.Indices); | ||
Contracts.CheckParam(scoreCol != null, nameof(pred), "Indexing delegate resulted in null score column."); | ||
Contracts.CheckParam(probCol != null, nameof(pred), "Indexing delegate resulted in null probability column."); | ||
Contracts.CheckParam(predCol != null, nameof(pred), "Indexing delegate resulted in null predicted label column."); | ||
string scoreName = indexer.Get(scoreCol); | ||
string probName = indexer.Get(probCol); | ||
string predName = indexer.Get(predCol); | ||
|
||
var eval = new BinaryClassifierEvaluator(env, new Arguments() { }); | ||
|
||
var roles = new RoleMappedData(data.AsDynamic, opt: false, | ||
RoleMappedSchema.ColumnRole.Label.Bind(labelName), | ||
RoleMappedSchema.CreatePair(MetadataUtils.Const.ScoreValueKind.Score, scoreName), | ||
RoleMappedSchema.CreatePair(MetadataUtils.Const.ScoreValueKind.Probability, probName), | ||
RoleMappedSchema.CreatePair(MetadataUtils.Const.ScoreValueKind.PredictedLabel, predName)); | ||
|
||
var resultDict = eval.Evaluate(roles); | ||
env.Assert(resultDict.ContainsKey(MetricKinds.OverallMetrics)); | ||
var overall = resultDict[MetricKinds.OverallMetrics]; | ||
|
||
CalibratedResult result; | ||
using (var cursor = overall.GetRowCursor(i => true)) | ||
{ | ||
var moved = cursor.MoveNext(); | ||
env.Assert(moved); | ||
result = new CalibratedResult(env, cursor); | ||
moved = cursor.MoveNext(); | ||
env.Assert(!moved); | ||
} | ||
return result; | ||
} | ||
|
||
/// <summary> | ||
/// Evaluates scored binary classification data. | ||
/// </summary> | ||
/// <typeparam name="T">The shape type for the input data.</typeparam> | ||
/// <param name="data">The data to evaluate.</param> | ||
/// <param name="label">The index delegate for the label column.</param> | ||
/// <param name="pred">The index delegate for columns from calibrated prediction of a binary classifier. | ||
/// Under typical scenarios, this will just be the same tuple of results returned from the trainer.</param> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
When I read this, I can't help thinking: under what scenarios this won't be the case. #Resolved There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, you could imagine someone composing their own dataset, not training anything or doing anything but loading a file. But I didn't feel the need to call this out. In reply to: 216473441 [](ancestors = 216473441) |
||
/// <returns>The evaluation results for these uncalibrated outputs.</returns> | ||
public static Result Evaluate<T>( | ||
DataView<T> data, | ||
Func<T, Scalar<bool>> label, | ||
Func<T, (Scalar<float> score, Scalar<bool> predictedLabel)> pred) | ||
{ | ||
Contracts.CheckValue(data, nameof(data)); | ||
var env = StaticPipeUtils.GetEnvironment(data); | ||
Contracts.AssertValue(env); | ||
env.CheckValue(label, nameof(label)); | ||
env.CheckValue(pred, nameof(pred)); | ||
|
||
var indexer = StaticPipeUtils.GetIndexer(data); | ||
string labelName = indexer.Get(label(indexer.Indices)); | ||
(var scoreCol, var predCol) = pred(indexer.Indices); | ||
Contracts.CheckParam(scoreCol != null, nameof(pred), "Indexing delegate resulted in null score column."); | ||
Contracts.CheckParam(predCol != null, nameof(pred), "Indexing delegate resulted in null predicted label column."); | ||
string scoreName = indexer.Get(scoreCol); | ||
string predName = indexer.Get(predCol); | ||
|
||
var eval = new BinaryClassifierEvaluator(env, new Arguments() { }); | ||
|
||
var roles = new RoleMappedData(data.AsDynamic, opt: false, | ||
RoleMappedSchema.ColumnRole.Label.Bind(labelName), | ||
RoleMappedSchema.CreatePair(MetadataUtils.Const.ScoreValueKind.Score, scoreName), | ||
RoleMappedSchema.CreatePair(MetadataUtils.Const.ScoreValueKind.PredictedLabel, predName)); | ||
|
||
var resultDict = eval.Evaluate(roles); | ||
env.Assert(resultDict.ContainsKey(MetricKinds.OverallMetrics)); | ||
var overall = resultDict[MetricKinds.OverallMetrics]; | ||
|
||
Result result; | ||
using (var cursor = overall.GetRowCursor(i => true)) | ||
{ | ||
var moved = cursor.MoveNext(); | ||
env.Assert(moved); | ||
result = new Result(env, cursor); | ||
moved = cursor.MoveNext(); | ||
env.Assert(!moved); | ||
} | ||
return result; | ||
} | ||
} | ||
|
||
public sealed class BinaryPerInstanceEvaluator : PerInstanceEvaluatorBase | ||
|
@@ -1526,4 +1760,4 @@ private static IDataView ExtractConfusionMatrix(IHost host, Dictionary<string, I | |
return confusionMatrix; | ||
} | ||
} | ||
} | ||
} |
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
not necessary on this PR, but maybe we can provide links to the longer explanation of the metrics (wikipedia or the MSFT doc on RML, maybe?), since those comments will be the user-facing documentation about the metrics (I assume) #Pending
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree this might be helpful. Maybe we can review later, frankly I don't quite know how to insert links in XML docs since AFAIK
<a
is not a supported tag.In reply to: 216470620 [](ancestors = 216470620)