dotnet · TomFinley · Sep 10, 2018 · Sep 7, 2018 · Sep 10, 2018 · sfilipi
diff --git a/src/Microsoft.ML.Data/Evaluators/BinaryClassifierEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/BinaryClassifierEvaluator.cs
@@ -5,6 +5,8 @@
 using System;
 using System.Collections.Generic;
 using System.Linq;
+using Microsoft.ML.Data.StaticPipe;
+using Microsoft.ML.Data.StaticPipe.Runtime;
 using Microsoft.ML.Runtime;
 using Microsoft.ML.Runtime.CommandLine;
 using Microsoft.ML.Runtime.Data;
@@ -410,66 +412,58 @@ public sealed class Counters
 
                 public Double Acc
                 {
-                    get
-                    {
+                    get {
                         return (NumTrueNeg + NumTruePos) / (NumTruePos + NumTrueNeg + NumFalseNeg + NumFalsePos);
                     }
                 }
 
                 public Double RecallPos
                 {
-                    get
-                    {
+                    get {
                         return (NumTruePos + NumFalseNeg > 0) ? NumTruePos / (NumTruePos + NumFalseNeg) : 0;
                     }
                 }
 
                 public Double PrecisionPos
                 {
-                    get
-                    {
+                    get {
                         return (NumTruePos + NumFalsePos > 0) ? NumTruePos / (NumTruePos + NumFalsePos) : 0;
                     }
                 }
 
                 public Double RecallNeg
                 {
-                    get
-                    {
+                    get {
                         return (NumTrueNeg + NumFalsePos > 0) ? NumTrueNeg / (NumTrueNeg + NumFalsePos) : 0;
                     }
                 }
 
                 public Double PrecisionNeg
                 {
-                    get
-                    {
+                    get {
                         return (NumTrueNeg + NumFalseNeg > 0) ? NumTrueNeg / (NumTrueNeg + NumFalseNeg) : 0;
                     }
                 }
 
                 public Double Entropy
                 {
-                    get
-                    {
+                    get {
                         return MathUtils.Entropy((NumTruePos + NumFalseNeg) /
                             (NumTruePos + NumTrueNeg + NumFalseNeg + NumFalsePos));
                     }
                 }
 
                 public Double LogLoss
                 {
-                    get
-                    {
+                    get {
                         return Double.IsNaN(_logLoss) ? Double.NaN : (_numLogLossPositives + _numLogLossNegatives > 0)
                             ? _logLoss / (_numLogLossPositives + _numLogLossNegatives) : 0;
                     }
                 }
 
                 public Double LogLossReduction
                 {
-                    get
-                    {
+                    get {
                         if (_numLogLossPositives + _numLogLossNegatives == 0)
                             return 0;
                         var logLoss = _logLoss / (_numLogLossPositives + _numLogLossNegatives);
@@ -787,6 +781,246 @@ private void ComputePrCurves()
                 }
             }
         }
+
+        /// <summary>
+        /// Evaluation results for binary classifiers, excluding probabilistic metrics.
+        /// </summary>
+        public class Result
+        {
+            /// <summary>
+            /// Gets the area under the ROC curve.
+            /// </summary>
+            /// <remarks>
+            /// The area under the ROC curve is equal to the probability that the classifier ranks
+            /// a randomly chosen positive instance higher than a randomly chosen negative one
+            /// (assuming 'positive' ranks higher than 'negative').
+            /// </remarks>
+            public double Auc { get; }
+
+            /// <summary>
+            /// Gets the accuracy of a classifier which is the proportion of correct predictions in the test set.
+            /// </summary>
+            public double Accuracy { get; }
+
+            /// <summary>
+            /// Gets the positive precision of a classifier which is the proportion of correctly predicted
+            /// positive instances among all the positive predictions (i.e., the number of positive instances
+            /// predicted as positive, divided by the total number of instances predicted as positive).
+            /// </summary>
+            public double PositivePrecision { get; }
+
+            /// <summary>
+            /// Gets the positive recall of a classifier which is the proportion of correctly predicted
+            /// positive instances among all the positive instances (i.e., the number of positive instances
+            /// predicted as positive, divided by the total number of positive instances).
+            /// </summary>
+            public double PositiveRecall { get; private set; }
+
+            /// <summary>
+            /// Gets the negative precision of a classifier which is the proportion of correctly predicted
+            /// negative instances among all the negative predictions (i.e., the number of negative instances
+            /// predicted as negative, divided by the total number of instances predicted as negative).
+            /// </summary>
+            public double NegativePrecision { get; }
+
+            /// <summary>
+            /// Gets the negative recall of a classifier which is the proportion of correctly predicted
+            /// negative instances among all the negative instances (i.e., the number of negative instances
+            /// predicted as negative, divided by the total number of negative instances).
+            /// </summary>
+            public double NegativeRecall { get; }
+
+            /// <summary>
+            /// Gets the F1 score of the classifier.
+            /// </summary>
+            /// <remarks>
+            /// F1 score is the harmonic mean of precision and recall: 2 * precision * recall / (precision + recall).
+            /// </remarks>
+            public double F1Score { get; }
+
+            /// <summary>
+            /// Gets the area under the precision/recall curve of the classifier.
+            /// </summary>
+            /// <remarks>
+            /// The area under the precision/recall curve is a single number summary of the information in the
+            /// precision/recall curve. It is increasingly used in the machine learning community, particularly
+            /// for imbalanced datasets where one class is observed more frequently than the other. On these
+            /// datasets, AUPRC can highlight performance differences that are lost with AUC.
+            /// </remarks>
+            public double Auprc { get; }
+
+            protected private static T Fetch<T>(IExceptionContext ectx, IRow row, string name)
+            {
+                if (!row.Schema.TryGetColumnIndex(name, out int col))
+                    throw ectx.Except($"Could not find column '{name}'");
+                T val = default;
+                row.GetGetter<T>(col)(ref val);
+                return val;
+            }
+
+            internal Result(IExceptionContext ectx, IRow overallResult)
+            {
+                double Fetch(string name) => Fetch<double>(ectx, overallResult, name);
+                Auc = Fetch(BinaryClassifierEvaluator.Auc);
+                Accuracy = Fetch(BinaryClassifierEvaluator.Accuracy);
+                PositivePrecision = Fetch(BinaryClassifierEvaluator.PosPrecName);
+                PositiveRecall = Fetch(BinaryClassifierEvaluator.PosRecallName);
+                NegativePrecision = Fetch(BinaryClassifierEvaluator.NegPrecName);
+                NegativeRecall = Fetch(BinaryClassifierEvaluator.NegRecallName);
+                F1Score = Fetch(BinaryClassifierEvaluator.F1);
+                Auprc = Fetch(BinaryClassifierEvaluator.AuPrc);
+            }
+        }
+
+        /// <summary>
+        /// Evaluation results for binary classifiers, including probabilistic metrics.
+        /// </summary>
+        public sealed class CalibratedResult : Result
+        {
+            /// <summary>
+            /// Gets the log-loss of the classifier.
+            /// </summary>
+            /// <remarks>
+            /// The log-loss metric, is computed as follows:
+            /// LL = - (1/m) * sum( log(p[i]))
+            /// where m is the number of instances in the test set.
+            /// p[i] is the probability returned by the classifier if the instance belongs to class 1,
+            /// and 1 minus the probability returned by the classifier if the instance belongs to class 0.
+            /// </remarks>
+            public double LogLoss { get; }
+
+            /// <summary>
+            /// Gets the log-loss reduction (also known as relative log-loss, or reduction in information gain - RIG)
+            /// of the classifier.
+            /// </summary>
+            /// <remarks>
+            /// The log-loss reduction is scaled relative to a classifier that predicts the prior for every example:
+            /// (LL(prior) - LL(classifier)) / LL(prior)
+            /// This metric can be interpreted as the advantage of the classifier over a random prediction.
+            /// E.g., if the RIG equals 20, it can be interpreted as &quot;the probability of a correct prediction is
+            /// 20% better than random guessing.&quot;
+            /// </remarks>
+            public double LogLossReduction { get; }
+
+            /// <summary>
+            /// Gets the test-set entropy (prior Log-Loss/instance) of the classifier.
+            /// </summary>
+            public double Entropy { get; }
+
+            internal CalibratedResult(IExceptionContext ectx, IRow overallResult)
+                : base(ectx, overallResult)
+            {
+                double Fetch(string name) => Fetch<double>(ectx, overallResult, name);
+                LogLoss = Fetch(BinaryClassifierEvaluator.LogLoss);
+                LogLossReduction = Fetch(BinaryClassifierEvaluator.LogLossReduction);
+                Entropy = Fetch(BinaryClassifierEvaluator.Entropy);
+            }
+        }
+
+        /// <summary>
+        /// Evaluates scored binary classification data.
+        /// </summary>
+        /// <typeparam name="T">The shape type for the input data.</typeparam>
+        /// <param name="data">The data to evaluate.</param>
+        /// <param name="label">The index delegate for the label column.</param>
+        /// <param name="pred">The index delegate for columns from calibrated prediction of a binary classifier.
+        /// Under typical scenarios, this will just be the same tuple of results returned from the trainer.</param>
+        /// <returns>The evaluation results for these calibrated outputs.</returns>
+        public static CalibratedResult Evaluate<T>(
+            DataView<T> data,
+            Func<T, Scalar<bool>> label,
+            Func<T, (Scalar<float> score, Scalar<float> probability, Scalar<bool> predictedLabel)> pred)
+        {
+            Contracts.CheckValue(data, nameof(data));
+            var env = StaticPipeUtils.GetEnvironment(data);
+            Contracts.AssertValue(env);
+            env.CheckValue(label, nameof(label));
+            env.CheckValue(pred, nameof(pred));
+
+            var indexer = StaticPipeUtils.GetIndexer(data);
+            string labelName = indexer.Get(label(indexer.Indices));
+            (var scoreCol, var probCol, var predCol) = pred(indexer.Indices);
+            Contracts.CheckParam(scoreCol != null, nameof(pred), "Indexing delegate resulted in null score column.");
+            Contracts.CheckParam(probCol != null, nameof(pred), "Indexing delegate resulted in null probability column.");
+            Contracts.CheckParam(predCol != null, nameof(pred), "Indexing delegate resulted in null predicted label column.");
+            string scoreName = indexer.Get(scoreCol);
+            string probName = indexer.Get(probCol);
+            string predName = indexer.Get(predCol);
+
+            var eval = new BinaryClassifierEvaluator(env, new Arguments() { });
+
+            var roles = new RoleMappedData(data.AsDynamic, opt: false,
+                RoleMappedSchema.ColumnRole.Label.Bind(labelName),
+                RoleMappedSchema.CreatePair(MetadataUtils.Const.ScoreValueKind.Score, scoreName),
+                RoleMappedSchema.CreatePair(MetadataUtils.Const.ScoreValueKind.Probability, probName),
+                RoleMappedSchema.CreatePair(MetadataUtils.Const.ScoreValueKind.PredictedLabel, predName));
+
+            var resultDict = eval.Evaluate(roles);
+            env.Assert(resultDict.ContainsKey(MetricKinds.OverallMetrics));
+            var overall = resultDict[MetricKinds.OverallMetrics];
+
+            CalibratedResult result;
+            using (var cursor = overall.GetRowCursor(i => true))
+            {
+                var moved = cursor.MoveNext();
+                env.Assert(moved);
+                result = new CalibratedResult(env, cursor);
+                moved = cursor.MoveNext();
+                env.Assert(!moved);
+            }
+            return result;
+        }
+
+        /// <summary>
+        /// Evaluates scored binary classification data.
+        /// </summary>
+        /// <typeparam name="T">The shape type for the input data.</typeparam>
+        /// <param name="data">The data to evaluate.</param>
+        /// <param name="label">The index delegate for the label column.</param>
+        /// <param name="pred">The index delegate for columns from calibrated prediction of a binary classifier.
+        /// Under typical scenarios, this will just be the same tuple of results returned from the trainer.</param>
+        /// <returns>The evaluation results for these uncalibrated outputs.</returns>
+        public static Result Evaluate<T>(
+            DataView<T> data,
+            Func<T, Scalar<bool>> label,
+            Func<T, (Scalar<float> score, Scalar<bool> predictedLabel)> pred)
+        {
+            Contracts.CheckValue(data, nameof(data));
+            var env = StaticPipeUtils.GetEnvironment(data);
+            Contracts.AssertValue(env);
+            env.CheckValue(label, nameof(label));
+            env.CheckValue(pred, nameof(pred));
+
+            var indexer = StaticPipeUtils.GetIndexer(data);
+            string labelName = indexer.Get(label(indexer.Indices));
+            (var scoreCol, var predCol) = pred(indexer.Indices);
+            Contracts.CheckParam(scoreCol != null, nameof(pred), "Indexing delegate resulted in null score column.");
+            Contracts.CheckParam(predCol != null, nameof(pred), "Indexing delegate resulted in null predicted label column.");
+            string scoreName = indexer.Get(scoreCol);
+            string predName = indexer.Get(predCol);
+
+            var eval = new BinaryClassifierEvaluator(env, new Arguments() { });
+
+            var roles = new RoleMappedData(data.AsDynamic, opt: false,
+                RoleMappedSchema.ColumnRole.Label.Bind(labelName),
+                RoleMappedSchema.CreatePair(MetadataUtils.Const.ScoreValueKind.Score, scoreName),
+                RoleMappedSchema.CreatePair(MetadataUtils.Const.ScoreValueKind.PredictedLabel, predName));
+
+            var resultDict = eval.Evaluate(roles);
+            env.Assert(resultDict.ContainsKey(MetricKinds.OverallMetrics));
+            var overall = resultDict[MetricKinds.OverallMetrics];
+
+            Result result;
+            using (var cursor = overall.GetRowCursor(i => true))
+            {
+                var moved = cursor.MoveNext();
+                env.Assert(moved);
+                result = new Result(env, cursor);
+                moved = cursor.MoveNext();
+                env.Assert(!moved);
+            }
+            return result;
+        }
     }
 
     public sealed class BinaryPerInstanceEvaluator : PerInstanceEvaluatorBase
@@ -1526,4 +1760,4 @@ private static IDataView ExtractConfusionMatrix(IHost host, Dictionary<string, I
             return confusionMatrix;
         }
     }
-}
+}