From 5fbf7404aa01e967d7a1fcd73b237214c05aac8c Mon Sep 17 00:00:00 2001
From: Jason DeBoever <github@deboever.us>
Date: Mon, 7 Sep 2020 21:51:22 -0400
Subject: [PATCH 01/28] Fix for issue 744

---
 .../SdcaMaximumEntropy.cs                     | 19 ++--
 docs/samples/Microsoft.ML.Samples/Program.cs  | 34 +++----
 .../MulticlassClassificationMetrics.cs        |  9 +-
 .../MulticlassClassificationEvaluator.cs      | 88 +++++++++++++------
 ...sticDualCoordinateAscentClassifierBench.cs | 10 ++-
 5 files changed, 106 insertions(+), 54 deletions(-)
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs
index 2a83a84f3a..7d358e5e35 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs
@@ -46,7 +46,7 @@ public static void Example()
             // Create testing data. Use different random seed to make it different
             // from training data.
             var testData = mlContext.Data
-                .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123));
+                .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123, greatestLabel: 4));
 
             // Run the model on test data set.
             var transformedTestData = model.Transform(testData);
@@ -57,7 +57,7 @@ public static void Example()
                 reuseRowObject: false).ToList();
 
             // Look at 5 predictions
-            foreach (var p in predictions.Take(5))
+            foreach (var p in predictions.Take(100))
                 Console.WriteLine($"Label: {p.Label}, " + 
                     $"Prediction: {p.PredictedLabel}");
 
@@ -70,7 +70,7 @@ public static void Example()
 
             // Evaluate the overall metrics
             var metrics = mlContext.MulticlassClassification
-                .Evaluate(transformedTestData);
+                .Evaluate(transformedTestData, topKPredictionCount:3);
 
             PrintMetrics(metrics);
             
@@ -93,7 +93,7 @@ public static void Example()
         // Generates random uniform doubles in [-0.5, 0.5)
         // range with labels 1, 2 or 3.
         private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count,
-            int seed=0)
+            int seed=0, int greatestLabel = 4)
 
         {
             var random = new Random(seed);
@@ -101,10 +101,10 @@ private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count,
             for (int i = 0; i < count; i++)
             {
                 // Generate Labels that are integers 1, 2 or 3
-                var label = random.Next(1, 4);
+                var label = random.Next(1, greatestLabel);
                 yield return new DataPoint
                 {
-                    Label = (uint)label,
+                    Label = label.ToString() + "@",
                     // Create random features that are correlated with the label.
                     // The feature values are slightly increased by adding a
                     // constant multiple of label.
@@ -119,7 +119,7 @@ private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count,
         // such examples.
         private class DataPoint
         {
-            public uint Label { get; set; }
+            public string Label { get; set; }
             [VectorType(20)]
             public float[] Features { get; set; }
         }
@@ -142,6 +142,11 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics)
             Console.WriteLine(
                 $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n");
 
+            for (int k=0; k < metrics.TopKAccuracyForAllK.Count(); k++)
+            {
+                Console.WriteLine($"Top {k} Accuracy: {metrics.TopKAccuracyForAllK[k]:F2}");
+            }
+
             Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable());
         }
     }
diff --git a/docs/samples/Microsoft.ML.Samples/Program.cs b/docs/samples/Microsoft.ML.Samples/Program.cs
index b30e20d7cf..8f196ee860 100644
--- a/docs/samples/Microsoft.ML.Samples/Program.cs
+++ b/docs/samples/Microsoft.ML.Samples/Program.cs
@@ -1,6 +1,7 @@
 using System;
 using System.Reflection;
 using Samples.Dynamic;
+using Samples.Dynamic.Trainers.MulticlassClassification;
 
 namespace Microsoft.ML.Samples
 {
@@ -8,25 +9,28 @@ public static class Program
     {
         public static void Main(string[] args) => RunAll(args == null || args.Length == 0 ? null : args[0]);
 
+
         internal static void RunAll(string name = null)
         {
-            int samples = 0;
-            foreach (var type in Assembly.GetExecutingAssembly().GetTypes())
-            {
-                if (name == null || name.Equals(type.Name))
-                {
-                    var sample = type.GetMethod("Example", BindingFlags.Public | BindingFlags.Static | BindingFlags.FlattenHierarchy);
+            //int samples = 0;
+            //foreach (var type in Assembly.GetExecutingAssembly().GetTypes())
+            //{
+            //    if (name == null || name.Equals(type.Name))
+            //    {
+            //        var sample = type.GetMethod("Example", BindingFlags.Public | BindingFlags.Static | BindingFlags.FlattenHierarchy);
+
+            //        if (sample != null)
+            //        {
+            //            Console.WriteLine(type.Name);
+            //            sample.Invoke(null, null);
+            //            samples++;
+            //        }
+            //    }
+            //}
 
-                    if (sample != null)
-                    {
-                        Console.WriteLine(type.Name);
-                        sample.Invoke(null, null);
-                        samples++;
-                    }
-                }
-            }
+            SdcaMaximumEntropy.Example();
 
-            Console.WriteLine("Number of samples that ran without any exception: " + samples);
+            Console.WriteLine("Number of samples that ran without any exception: ");
         }
     }
 }
diff --git a/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassificationMetrics.cs b/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassificationMetrics.cs
index 05d8f050d0..367d92385b 100644
--- a/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassificationMetrics.cs
+++ b/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassificationMetrics.cs
@@ -4,6 +4,7 @@
 
 using System.Collections.Generic;
 using System.Collections.Immutable;
+using System.Linq;
 using Microsoft.ML.Runtime;
 
 namespace Microsoft.ML.Data
@@ -81,6 +82,11 @@ public sealed class MulticlassClassificationMetrics
         /// </summary>
         public int TopKPredictionCount { get; }
 
+        /// <summary>
+        /// Returns the top K for all K from 1 to the number of classes
+        /// </summary>
+        public IReadOnlyList<double> TopKAccuracyForAllK { get; }
+
         /// <summary>
         /// Gets the log-loss of the classifier for each class. Log-loss measures the performance of a classifier
         /// with respect to how much the predicted probabilities diverge from the true class label. Lower
@@ -114,9 +120,10 @@ internal MulticlassClassificationMetrics(IHost host, DataViewRow overallResult,
             MacroAccuracy = FetchDouble(MulticlassClassificationEvaluator.AccuracyMacro);
             LogLoss = FetchDouble(MulticlassClassificationEvaluator.LogLoss);
             LogLossReduction = FetchDouble(MulticlassClassificationEvaluator.LogLossReduction);
+            TopKAccuracyForAllK = RowCursorUtils.Fetch<VBuffer<double>>(host, overallResult, MulticlassClassificationEvaluator.AllTopKAccuracy).DenseValues().ToImmutableArray();
             TopKPredictionCount = topKPredictionCount;
             if (topKPredictionCount > 0)
-                TopKAccuracy = FetchDouble(MulticlassClassificationEvaluator.TopKAccuracy);
+                TopKAccuracy = TopKAccuracyForAllK[topKPredictionCount-1];
 
             var perClassLogLoss = RowCursorUtils.Fetch<VBuffer<double>>(host, overallResult, MulticlassClassificationEvaluator.PerClassLogLoss);
             PerClassLogLoss = perClassLogLoss.DenseValues().ToImmutableArray();
diff --git a/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
index e616d19a55..abac0cdef9 100644
--- a/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
+++ b/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
@@ -41,6 +41,7 @@ public sealed class Arguments
         public const string AccuracyMicro = "Accuracy(micro-avg)";
         public const string AccuracyMacro = "Accuracy(macro-avg)";
         public const string TopKAccuracy = "Top K accuracy";
+        public const string AllTopKAccuracy = "Top K accuracy(All K)";
         public const string PerClassLogLoss = "Per class log-loss";
         public const string LogLoss = "Log-loss";
         public const string LogLossReduction = "Log-loss reduction";
@@ -60,7 +61,6 @@ public enum Metrics
         internal const string LoadName = "MultiClassClassifierEvaluator";
 
         private readonly int? _outputTopKAcc;
-        private readonly bool _names;
 
         public MulticlassClassificationEvaluator(IHostEnvironment env, Arguments args)
             : base(env, LoadName)
@@ -68,7 +68,6 @@ public MulticlassClassificationEvaluator(IHostEnvironment env, Arguments args)
             Host.AssertValue(args, "args");
             Host.CheckUserArg(args.OutputTopKAcc == null || args.OutputTopKAcc > 0, nameof(args.OutputTopKAcc));
             _outputTopKAcc = args.OutputTopKAcc;
-            _names = args.Names;
         }
 
         private protected override void CheckScoreAndLabelTypes(RoleMappedSchema schema)
@@ -147,6 +146,7 @@ private protected override void GetAggregatorConsolidationFuncs(Aggregator aggre
             var logLoss = new List<double>();
             var logLossRed = new List<double>();
             var topKAcc = new List<double>();
+            var allTopK = new List<double[]>();
             var perClassLogLoss = new List<double[]>();
             var counts = new List<double[]>();
             var weights = new List<double[]>();
@@ -172,6 +172,7 @@ private protected override void GetAggregatorConsolidationFuncs(Aggregator aggre
                     logLossRed.Add(agg.UnweightedCounters.Reduction);
                     if (agg.UnweightedCounters.OutputTopKAcc > 0)
                         topKAcc.Add(agg.UnweightedCounters.TopKAccuracy);
+                    allTopK.Add(agg.UnweightedCounters.AllTopKAccuracy);
                     perClassLogLoss.Add(agg.UnweightedCounters.PerClassLogLoss);
 
                     confStratCol.AddRange(agg.UnweightedCounters.ConfusionTable.Select(x => stratColKey));
@@ -189,6 +190,7 @@ private protected override void GetAggregatorConsolidationFuncs(Aggregator aggre
                         logLossRed.Add(agg.WeightedCounters.Reduction);
                         if (agg.WeightedCounters.OutputTopKAcc > 0)
                             topKAcc.Add(agg.WeightedCounters.TopKAccuracy);
+                        allTopK.Add(agg.WeightedCounters.AllTopKAccuracy);
                         perClassLogLoss.Add(agg.WeightedCounters.PerClassLogLoss);
                         weights.AddRange(agg.WeightedCounters.ConfusionTable);
                     }
@@ -211,6 +213,7 @@ private protected override void GetAggregatorConsolidationFuncs(Aggregator aggre
                     overallDvBldr.AddColumn(LogLossReduction, NumberDataViewType.Double, logLossRed.ToArray());
                     if (aggregator.UnweightedCounters.OutputTopKAcc > 0)
                         overallDvBldr.AddColumn(TopKAccuracy, NumberDataViewType.Double, topKAcc.ToArray());
+                    overallDvBldr.AddColumn(AllTopKAccuracy, NumberDataViewType.Double, allTopK.ToArray());
                     overallDvBldr.AddColumn(PerClassLogLoss, aggregator.GetSlotNames, NumberDataViewType.Double, perClassLogLoss.ToArray());
 
                     var confDvBldr = new ArrayDataViewBuilder(Host);
@@ -246,9 +249,11 @@ public sealed class Counters
                 private double _totalLogLoss;
                 private double _numInstances;
                 private double _numCorrect;
-                private double _numCorrectTopK;
+                private int _numUnknownClassInstances;
                 private readonly double[] _sumWeightsOfClass;
                 private readonly double[] _totalPerClassLogLoss;
+                private readonly long[] _seenRanks;
+
                 public readonly double[][] ConfusionTable;
 
                 public double MicroAvgAccuracy { get { return _numInstances > 0 ? _numCorrect / _numInstances : 0; } }
@@ -291,7 +296,8 @@ public double Reduction
                     }
                 }
 
-                public double TopKAccuracy { get { return _numInstances > 0 ? _numCorrectTopK / _numInstances : 0; } }
+                public double TopKAccuracy => !(OutputTopKAcc is null) ? AllTopKAccuracy[OutputTopKAcc.Value] : 0d;
+                public double[] AllTopKAccuracy => CumulativeSum(_seenRanks.Select(l => l / (double)(_numInstances - _numUnknownClassInstances))).ToArray();
 
                 // The per class average log loss is calculated by dividing the weighted sum of the log loss of examples
                 // in each class by the total weight of examples in that class.
@@ -316,14 +322,12 @@ public Counters(int numClasses, int? outputTopKAcc)
                     ConfusionTable = new double[numClasses][];
                     for (int i = 0; i < ConfusionTable.Length; i++)
                         ConfusionTable[i] = new double[numClasses];
+
+                    _seenRanks = new long[numClasses + 1];
                 }
 
-                public void Update(int[] indices, double loglossCurr, int label, float weight)
+                public void Update(int seenRank, int assigned, double loglossCurr, int label, float weight)
                 {
-                    Contracts.Assert(Utils.Size(indices) == _numClasses);
-
-                    int assigned = indices[0];
-
                     _numInstances += weight;
 
                     if (label < _numClasses)
@@ -334,23 +338,34 @@ public void Update(int[] indices, double loglossCurr, int label, float weight)
                     if (label < _numClasses)
                         _totalPerClassLogLoss[label] += loglossCurr * weight;
 
-                    if (assigned == label)
+                    _seenRanks[seenRank]++;
+
+                    if (seenRank == 0) //prediction matched label
                     {
                         _numCorrect += weight;
                         ConfusionTable[label][label] += weight;
-                        _numCorrectTopK += weight;
                     }
                     else if (label < _numClasses)
                     {
-                        if (OutputTopKAcc > 0)
-                        {
-                            int idx = Array.IndexOf(indices, label);
-                            if (0 <= idx && idx < OutputTopKAcc)
-                                _numCorrectTopK += weight;
-                        }
                         ConfusionTable[label][assigned] += weight;
                     }
+                    else
+                    {
+                        _numUnknownClassInstances++;
+                    }
+                }
+
+                private static IEnumerable<double> CumulativeSum(IEnumerable<double> s)
+                {
+                    double sum = 0;
+                    ;
+                    foreach (var x in s)
+                    {
+                        sum += x;
+                        yield return sum;
+                    }
                 }
+
             }
 
             private ValueGetter<float> _labelGetter;
@@ -359,7 +374,6 @@ public void Update(int[] indices, double loglossCurr, int label, float weight)
 
             private VBuffer<float> _scores;
             private readonly float[] _scoresArr;
-            private int[] _indicesArr;
 
             private const float Epsilon = (float)1e-15;
 
@@ -380,6 +394,7 @@ public Aggregator(IHostEnvironment env, ReadOnlyMemory<char>[] classNames, int s
                 Host.Assert(Utils.Size(classNames) == scoreVectorSize);
 
                 _scoresArr = new float[scoreVectorSize];
+
                 UnweightedCounters = new Counters(scoreVectorSize, outputTopKAcc);
                 Weighted = weighted;
                 WeightedCounters = Weighted ? new Counters(scoreVectorSize, outputTopKAcc) : null;
@@ -400,6 +415,7 @@ internal override void InitializeNextPass(DataViewRow row, RoleMappedSchema sche
 
                 if (schema.Weight.HasValue)
                     _weightGetter = row.GetGetter<float>(schema.Weight.Value);
+
             }
 
             public override void ProcessRow()
@@ -437,16 +453,12 @@ public override void ProcessRow()
                     }
                 }
 
-                // Sort classes by prediction strength.
-                // Use stable OrderBy instead of Sort(), which may give different results on different machines.
-                if (Utils.Size(_indicesArr) < _scoresArr.Length)
-                    _indicesArr = new int[_scoresArr.Length];
-                int j = 0;
-                foreach (var index in Enumerable.Range(0, _scoresArr.Length).OrderByDescending(i => _scoresArr[i]))
-                    _indicesArr[j++] = index;
-
                 var intLabel = (int)label;
 
+                var assigned = Array.IndexOf(_scoresArr, _scoresArr.Max()); //perf could be improved
+
+                var wasKnownLabel = true;
+
                 // log-loss
                 double logloss;
                 if (intLabel < _scoresArr.Length)
@@ -461,11 +473,21 @@ public override void ProcessRow()
                     // Penalize logloss if the label was not seen during training
                     logloss = -Math.Log(Epsilon);
                     _numUnknownClassInstances++;
+                    wasKnownLabel = false;
                 }
 
-                UnweightedCounters.Update(_indicesArr, logloss, intLabel, 1);
+                // Get the probability that the CORRECT label has: (best case is that it's the highest probability):
+                var correctProba = !wasKnownLabel ? 0 : _scoresArr[intLabel];
+
+                // Find the rank of the *correct* label (in Scores[]). If 0 => Good, correct. And the lower the better.
+                // The rank will be from 0 to N. (Not N-1).
+                // Problem: What if we have probabilities that are equal to the correct prediction (eg, .6 .1 .1 .1 .1).
+                // This actually happens a lot with some models. Here we assign the worst rank in the case of a tie (so 4 in this example)
+                var correctRankWorstCase = !wasKnownLabel ? _scoresArr.Length : _scoresArr.Count(score => score >= correctProba) - 1;
+
+                UnweightedCounters.Update(correctRankWorstCase, assigned, logloss, intLabel, 1);
                 if (WeightedCounters != null)
-                    WeightedCounters.Update(_indicesArr, logloss, intLabel, weight);
+                    WeightedCounters.Update(correctRankWorstCase, assigned, logloss, intLabel, weight);
             }
 
             protected override List<string> GetWarningsCore()
@@ -909,6 +931,7 @@ private protected override IDataView CombineOverallMetricsCore(IDataView[] metri
             for (int i = 0; i < metrics.Length; i++)
             {
                 var idv = metrics[i];
+                idv = DropAllTopKColumn(idv);
                 if (!_outputPerClass)
                     idv = DropPerClassColumn(idv);
 
@@ -964,6 +987,15 @@ private IDataView DropPerClassColumn(IDataView input)
             return input;
         }
 
+        private IDataView DropAllTopKColumn(IDataView input)
+        {
+            if (input.Schema.TryGetColumnIndex(MulticlassClassificationEvaluator.AllTopKAccuracy, out int AllTopKCol))
+            {
+                input = ColumnSelectingTransformer.CreateDrop(Host, input, MulticlassClassificationEvaluator.AllTopKAccuracy);
+            }
+            return input;
+        }
+
         public override IEnumerable<MetricColumn> GetOverallMetricColumns()
         {
             yield return new MetricColumn("AccuracyMicro", MulticlassClassificationEvaluator.AccuracyMicro);
diff --git a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs
index 8aaeb53711..6404b2d290 100644
--- a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs
+++ b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs
@@ -37,6 +37,7 @@ public class StochasticDualCoordinateAscentClassifierBench : WithExtraMetrics
         private PredictionEngine<IrisData, IrisPrediction> _predictionEngine;
         private IrisData[][] _batches;
         private MulticlassClassificationMetrics _metrics;
+        private MulticlassClassificationEvaluator _evaluator;
 
         protected override IEnumerable<Metric> GetMetrics()
         {
@@ -118,7 +119,7 @@ public void TrainSentiment()
             _consumer.Consume(predicted);
         }
 
-        [GlobalSetup(Targets = new string[] { nameof(PredictIris), nameof(PredictIrisBatchOf1), nameof(PredictIrisBatchOf2), nameof(PredictIrisBatchOf5) })]
+        [GlobalSetup(Targets = new string[] { nameof(PredictIris), nameof(PredictIrisBatchOf1), nameof(PredictIrisBatchOf2), nameof(PredictIrisBatchOf5), nameof(EvaluateMetrics) })]
         public void SetupPredictBenchmarks()
         {
             _trainedModel = Train(_dataPath);
@@ -142,8 +143,8 @@ public void SetupPredictBenchmarks()
 
             IDataView testData = loader.Load(_dataPath);
             IDataView scoredTestData = _trainedModel.Transform(testData);
-            var evaluator = new MulticlassClassificationEvaluator(_mlContext, new MulticlassClassificationEvaluator.Arguments());
-            _metrics = evaluator.Evaluate(scoredTestData, DefaultColumnNames.Label, DefaultColumnNames.Score, DefaultColumnNames.PredictedLabel);
+            _evaluator = new MulticlassClassificationEvaluator(_mlContext, new MulticlassClassificationEvaluator.Arguments());
+            _metrics = _evaluator.Evaluate(scoredTestData, DefaultColumnNames.Label, DefaultColumnNames.Score, DefaultColumnNames.PredictedLabel);
 
             _batches = new IrisData[_batchSizes.Length][];
             for (int i = 0; i < _batches.Length; i++)
@@ -168,6 +169,9 @@ public void SetupPredictBenchmarks()
 
         [Benchmark]
         public void PredictIrisBatchOf5() => _trainedModel.Transform(_mlContext.Data.LoadFromEnumerable(_batches[2]));
+
+        [Benchmark]
+        public void EvaluateMetrics() => _evaluator = new MulticlassClassificationEvaluator(_mlContext, new MulticlassClassificationEvaluator.Arguments());
     }
 
     public class IrisData

From 1747d3e4b241024a24682940ce11eaaced62778e Mon Sep 17 00:00:00 2001
From: Jason DeBoever <github@deboever.us>
Date: Wed, 9 Sep 2020 18:26:49 -0400
Subject: [PATCH 02/28] cleanup

---
 .../SdcaMaximumEntropy.cs                     | 19 ++++-------
 docs/samples/Microsoft.ML.Samples/Program.cs  | 34 ++++++++-----------
 2 files changed, 22 insertions(+), 31 deletions(-)

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs
index 7d358e5e35..2a83a84f3a 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs
@@ -46,7 +46,7 @@ public static void Example()
             // Create testing data. Use different random seed to make it different
             // from training data.
             var testData = mlContext.Data
-                .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123, greatestLabel: 4));
+                .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123));
 
             // Run the model on test data set.
             var transformedTestData = model.Transform(testData);
@@ -57,7 +57,7 @@ public static void Example()
                 reuseRowObject: false).ToList();
 
             // Look at 5 predictions
-            foreach (var p in predictions.Take(100))
+            foreach (var p in predictions.Take(5))
                 Console.WriteLine($"Label: {p.Label}, " + 
                     $"Prediction: {p.PredictedLabel}");
 
@@ -70,7 +70,7 @@ public static void Example()
 
             // Evaluate the overall metrics
             var metrics = mlContext.MulticlassClassification
-                .Evaluate(transformedTestData, topKPredictionCount:3);
+                .Evaluate(transformedTestData);
 
             PrintMetrics(metrics);
             
@@ -93,7 +93,7 @@ public static void Example()
         // Generates random uniform doubles in [-0.5, 0.5)
         // range with labels 1, 2 or 3.
         private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count,
-            int seed=0, int greatestLabel = 4)
+            int seed=0)
 
         {
             var random = new Random(seed);
@@ -101,10 +101,10 @@ private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count,
             for (int i = 0; i < count; i++)
             {
                 // Generate Labels that are integers 1, 2 or 3
-                var label = random.Next(1, greatestLabel);
+                var label = random.Next(1, 4);
                 yield return new DataPoint
                 {
-                    Label = label.ToString() + "@",
+                    Label = (uint)label,
                     // Create random features that are correlated with the label.
                     // The feature values are slightly increased by adding a
                     // constant multiple of label.
@@ -119,7 +119,7 @@ private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count,
         // such examples.
         private class DataPoint
         {
-            public string Label { get; set; }
+            public uint Label { get; set; }
             [VectorType(20)]
             public float[] Features { get; set; }
         }
@@ -142,11 +142,6 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics)
             Console.WriteLine(
                 $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n");
 
-            for (int k=0; k < metrics.TopKAccuracyForAllK.Count(); k++)
-            {
-                Console.WriteLine($"Top {k} Accuracy: {metrics.TopKAccuracyForAllK[k]:F2}");
-            }
-
             Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable());
         }
     }
diff --git a/docs/samples/Microsoft.ML.Samples/Program.cs b/docs/samples/Microsoft.ML.Samples/Program.cs
index 8f196ee860..b30e20d7cf 100644
--- a/docs/samples/Microsoft.ML.Samples/Program.cs
+++ b/docs/samples/Microsoft.ML.Samples/Program.cs
@@ -1,7 +1,6 @@
 using System;
 using System.Reflection;
 using Samples.Dynamic;
-using Samples.Dynamic.Trainers.MulticlassClassification;
 
 namespace Microsoft.ML.Samples
 {
@@ -9,28 +8,25 @@ public static class Program
     {
         public static void Main(string[] args) => RunAll(args == null || args.Length == 0 ? null : args[0]);
 
-
         internal static void RunAll(string name = null)
         {
-            //int samples = 0;
-            //foreach (var type in Assembly.GetExecutingAssembly().GetTypes())
-            //{
-            //    if (name == null || name.Equals(type.Name))
-            //    {
-            //        var sample = type.GetMethod("Example", BindingFlags.Public | BindingFlags.Static | BindingFlags.FlattenHierarchy);
-
-            //        if (sample != null)
-            //        {
-            //            Console.WriteLine(type.Name);
-            //            sample.Invoke(null, null);
-            //            samples++;
-            //        }
-            //    }
-            //}
+            int samples = 0;
+            foreach (var type in Assembly.GetExecutingAssembly().GetTypes())
+            {
+                if (name == null || name.Equals(type.Name))
+                {
+                    var sample = type.GetMethod("Example", BindingFlags.Public | BindingFlags.Static | BindingFlags.FlattenHierarchy);
 
-            SdcaMaximumEntropy.Example();
+                    if (sample != null)
+                    {
+                        Console.WriteLine(type.Name);
+                        sample.Invoke(null, null);
+                        samples++;
+                    }
+                }
+            }
 
-            Console.WriteLine("Number of samples that ran without any exception: ");
+            Console.WriteLine("Number of samples that ran without any exception: " + samples);
         }
     }
 }

From 32c244aac249180a11454538e8fa4373722b13ce Mon Sep 17 00:00:00 2001
From: Jason DeBoever <github@deboever.us>
Date: Sat, 12 Sep 2020 14:01:02 -0400
Subject: [PATCH 03/28] fixing report output

---
 .../Evaluators/MulticlassClassificationEvaluator.cs         | 6 +++++-
 .../WE-Average-TrainTest-iris-out.txt                       | 4 ++++
 .../WE-Bootstrap-TrainTest-iris-out.txt                     | 4 ++++
 .../WE-SDCA-Average-TrainTest-iris-out.txt                  | 4 ++++
 .../WE-Stacking-TrainTest-iris-out.txt                      | 4 ++++
 .../WE-Voting-TrainTest-iris-out.txt                        | 4 ++++
 6 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
index abac0cdef9..413ee020b6 100644
--- a/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
+++ b/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
@@ -213,9 +213,13 @@ private protected override void GetAggregatorConsolidationFuncs(Aggregator aggre
                     overallDvBldr.AddColumn(LogLossReduction, NumberDataViewType.Double, logLossRed.ToArray());
                     if (aggregator.UnweightedCounters.OutputTopKAcc > 0)
                         overallDvBldr.AddColumn(TopKAccuracy, NumberDataViewType.Double, topKAcc.ToArray());
-                    overallDvBldr.AddColumn(AllTopKAccuracy, NumberDataViewType.Double, allTopK.ToArray());
                     overallDvBldr.AddColumn(PerClassLogLoss, aggregator.GetSlotNames, NumberDataViewType.Double, perClassLogLoss.ToArray());
 
+                    ValueGetter<VBuffer<ReadOnlyMemory<char>>> getKSlotNames =
+                        (ref VBuffer<ReadOnlyMemory<char>> dst) =>
+                            dst = new VBuffer<ReadOnlyMemory<char>>(allTopK.First().Length, Enumerable.Range(1,allTopK.First().Length).Select(i=>new ReadOnlyMemory<char>(($"K={i.ToString()}").ToCharArray())).ToArray());
+                    overallDvBldr.AddColumn(AllTopKAccuracy, getKSlotNames, NumberDataViewType.Double, allTopK.ToArray());
+
                     var confDvBldr = new ArrayDataViewBuilder(Host);
                     if (hasStrats)
                     {
diff --git a/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-Average-TrainTest-iris-out.txt b/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-Average-TrainTest-iris-out.txt
index 5e299a5d5c..888067df15 100644
--- a/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-Average-TrainTest-iris-out.txt
+++ b/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-Average-TrainTest-iris-out.txt
@@ -46,6 +46,10 @@ Accuracy(micro-avg): 0.946667
 Accuracy(macro-avg): 0.946667
 Log-loss:           0.433374
 Log-loss reduction: 0.605526
+Top K accuracy(All K)K=1: 0.946667
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-Bootstrap-TrainTest-iris-out.txt b/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-Bootstrap-TrainTest-iris-out.txt
index 1446808e2b..eb3d695b74 100644
--- a/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-Bootstrap-TrainTest-iris-out.txt
+++ b/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-Bootstrap-TrainTest-iris-out.txt
@@ -136,6 +136,10 @@ Accuracy(micro-avg): 0.946667
 Accuracy(macro-avg): 0.946667
 Log-loss:           0.434962
 Log-loss reduction: 0.604081
+Top K accuracy(All K)K=1: 0.946667
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-SDCA-Average-TrainTest-iris-out.txt b/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-SDCA-Average-TrainTest-iris-out.txt
index a6c93db8f3..86e84cf3b6 100644
--- a/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-SDCA-Average-TrainTest-iris-out.txt
+++ b/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-SDCA-Average-TrainTest-iris-out.txt
@@ -56,6 +56,10 @@ Accuracy(micro-avg): 0.980000
 Accuracy(macro-avg): 0.980000
 Log-loss:           0.061647
 Log-loss reduction: 0.943887
+Top K accuracy(All K)K=1: 0.980000
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-Stacking-TrainTest-iris-out.txt b/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-Stacking-TrainTest-iris-out.txt
index 99e556e559..7761b303dc 100644
--- a/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-Stacking-TrainTest-iris-out.txt
+++ b/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-Stacking-TrainTest-iris-out.txt
@@ -52,6 +52,10 @@ Accuracy(micro-avg): 0.900000
 Accuracy(macro-avg): 0.900000
 Log-loss:           0.431088
 Log-loss reduction: 0.607607
+Top K accuracy(All K)K=1: 0.900000
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-Voting-TrainTest-iris-out.txt b/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-Voting-TrainTest-iris-out.txt
index 94766eed5c..068f262ef8 100644
--- a/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-Voting-TrainTest-iris-out.txt
+++ b/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-Voting-TrainTest-iris-out.txt
@@ -46,6 +46,10 @@ Accuracy(micro-avg): 0.946667
 Accuracy(macro-avg): 0.946667
 Log-loss:           0.511576
 Log-loss reduction: 0.534344
+Top K accuracy(All K)K=1: 0.946667
+Top K accuracy(All K)K=2: 0.986667
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------

From 968b58d7b97b4cf037129dbb4b5c680bab2faee2 Mon Sep 17 00:00:00 2001
From: Jason DeBoever <github@deboever.us>
Date: Sat, 12 Sep 2020 14:22:20 -0400
Subject: [PATCH 04/28] fixedTestReferenceOutputs

---
 .../Common/LightGBMMC/LightGBMMC-CV-iris.key-out.txt      | 8 ++++++++
 .../Common/LightGBMMC/LightGBMMC-CV-iris.keyU404-out.txt  | 8 ++++++++
 .../LightGBMMC/LightGBMMC-TrainTest-iris.key-out.txt      | 4 ++++
 .../LightGBMMC/LightGBMMC-TrainTest-iris.keyU404-out.txt  | 4 ++++
 .../MultiClassNaiveBayes-CV-breast-cancer-out.txt         | 6 ++++++
 .../MultiClassNaiveBayes-TrainTest-breast-cancer-out.txt  | 3 +++
 .../LogisticRegression-Non-Negative-CV-iris-out.txt       | 8 ++++++++
 ...LogisticRegression-Non-Negative-TrainTest-iris-out.txt | 4 ++++
 .../LogisticRegression-Non-Negative-TrainTest-iris-rp.txt | 2 +-
 .../MulticlassLogisticRegression-CV-iris-out.txt          | 8 ++++++++
 ...lassLogisticRegression-CV-iris-tree-featurized-out.txt | 8 ++++++++
 ...ticRegression-CV-iris-tree-featurized-permuted-out.txt | 8 ++++++++
 .../MulticlassLogisticRegression-TrainTest-iris-out.txt   | 4 ++++
 ...isticRegression-TrainTest-iris-tree-featurized-out.txt | 4 ++++
 ...ession-TrainTest-iris-tree-featurized-permuted-out.txt | 4 ++++
 test/BaselineOutput/Common/OVA/OVA-CV-iris-out.txt        | 8 ++++++++
 .../Common/OVA/OVA-FastForest-CV-iris-out.txt             | 8 ++++++++
 .../Common/OVA/OVA-FastForest-TrainTest-iris-out.txt      | 4 ++++
 test/BaselineOutput/Common/OVA/OVA-TrainTest-iris-out.txt | 4 ++++
 test/BaselineOutput/Common/PKPD/PKPD-CV-iris-out.txt      | 8 ++++++++
 .../Common/PKPD/PKPD-TrainTest-iris-out.txt               | 4 ++++
 21 files changed, 118 insertions(+), 1 deletion(-)

diff --git a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.key-out.txt b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.key-out.txt
index cfe6101e4a..f2af91addf 100644
--- a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.key-out.txt
+++ b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.key-out.txt
@@ -23,6 +23,10 @@ Accuracy(micro-avg): 0.936709
 Accuracy(macro-avg): 0.942857
 Log-loss:           0.285741
 Log-loss reduction: 0.737254
+Top K accuracy(All K)K=1: 0.936709
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 Confusion table
           ||========================
@@ -37,6 +41,10 @@ Accuracy(micro-avg): 0.957746
 Accuracy(macro-avg): 0.953030
 Log-loss:           0.160970
 Log-loss reduction: 0.851729
+Top K accuracy(All K)K=1: 0.957746
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.keyU404-out.txt b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.keyU404-out.txt
index 0f8b13a0c5..b1a55bcb0d 100644
--- a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.keyU404-out.txt
+++ b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.keyU404-out.txt
@@ -23,6 +23,10 @@ Accuracy(micro-avg): 0.936709
 Accuracy(macro-avg): 0.942857
 Log-loss:           0.285741
 Log-loss reduction: 0.737254
+Top K accuracy(All K)K=1: 0.936709
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 Confusion table
           ||========================
@@ -37,6 +41,10 @@ Accuracy(micro-avg): 0.957746
 Accuracy(macro-avg): 0.953030
 Log-loss:           0.160970
 Log-loss reduction: 0.851729
+Top K accuracy(All K)K=1: 0.957746
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.key-out.txt b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.key-out.txt
index 880c8fa977..857003cbf3 100644
--- a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.key-out.txt
+++ b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.key-out.txt
@@ -18,6 +18,10 @@ Accuracy(micro-avg): 0.973333
 Accuracy(macro-avg): 0.973333
 Log-loss:           0.161048
 Log-loss reduction: 0.853408
+Top K accuracy(All K)K=1: 0.973333
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.keyU404-out.txt b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.keyU404-out.txt
index 7085992a17..6f242a10ae 100644
--- a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.keyU404-out.txt
+++ b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.keyU404-out.txt
@@ -18,6 +18,10 @@ Accuracy(micro-avg): 0.973333
 Accuracy(macro-avg): 0.973333
 Log-loss:           0.161048
 Log-loss reduction: 0.853408
+Top K accuracy(All K)K=1: 0.973333
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/MultiClassNaiveBayes/MultiClassNaiveBayes-CV-breast-cancer-out.txt b/test/BaselineOutput/Common/MultiClassNaiveBayes/MultiClassNaiveBayes-CV-breast-cancer-out.txt
index 54e16b5640..301032b3ea 100644
--- a/test/BaselineOutput/Common/MultiClassNaiveBayes/MultiClassNaiveBayes-CV-breast-cancer-out.txt
+++ b/test/BaselineOutput/Common/MultiClassNaiveBayes/MultiClassNaiveBayes-CV-breast-cancer-out.txt
@@ -16,6 +16,9 @@ Accuracy(micro-avg): 0.629834
 Accuracy(macro-avg): 0.500000
 Log-loss:           34.538776
 Log-loss reduction: -51.407404
+Top K accuracy(All K)K=1: 0.629834
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
 
 Confusion table
           ||======================
@@ -29,6 +32,9 @@ Accuracy(micro-avg): 0.682493
 Accuracy(macro-avg): 0.500000
 Log-loss:           34.538776
 Log-loss reduction: -54.264136
+Top K accuracy(All K)K=1: 0.682493
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/MultiClassNaiveBayes/MultiClassNaiveBayes-TrainTest-breast-cancer-out.txt b/test/BaselineOutput/Common/MultiClassNaiveBayes/MultiClassNaiveBayes-TrainTest-breast-cancer-out.txt
index eb0275e175..53ca634eeb 100644
--- a/test/BaselineOutput/Common/MultiClassNaiveBayes/MultiClassNaiveBayes-TrainTest-breast-cancer-out.txt
+++ b/test/BaselineOutput/Common/MultiClassNaiveBayes/MultiClassNaiveBayes-TrainTest-breast-cancer-out.txt
@@ -14,6 +14,9 @@ Accuracy(micro-avg): 0.655222
 Accuracy(macro-avg): 0.500000
 Log-loss:           34.538776
 Log-loss reduction: -52.618809
+Top K accuracy(All K)K=1: 0.655222
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/MulticlassLogisticRegression/LogisticRegression-Non-Negative-CV-iris-out.txt b/test/BaselineOutput/Common/MulticlassLogisticRegression/LogisticRegression-Non-Negative-CV-iris-out.txt
index c8e2526951..9685541e34 100644
--- a/test/BaselineOutput/Common/MulticlassLogisticRegression/LogisticRegression-Non-Negative-CV-iris-out.txt
+++ b/test/BaselineOutput/Common/MulticlassLogisticRegression/LogisticRegression-Non-Negative-CV-iris-out.txt
@@ -25,6 +25,10 @@ Accuracy(micro-avg): 0.962025
 Accuracy(macro-avg): 0.965079
 Log-loss:           0.129858
 Log-loss reduction: 0.880592
+Top K accuracy(All K)K=1: 0.962025
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 Confusion table
           ||========================
@@ -39,6 +43,10 @@ Accuracy(micro-avg): 0.971831
 Accuracy(macro-avg): 0.966667
 Log-loss:           0.125563
 Log-loss reduction: 0.884343
+Top K accuracy(All K)K=1: 0.971831
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/MulticlassLogisticRegression/LogisticRegression-Non-Negative-TrainTest-iris-out.txt b/test/BaselineOutput/Common/MulticlassLogisticRegression/LogisticRegression-Non-Negative-TrainTest-iris-out.txt
index 58e1f89e5b..93ec97e28c 100644
--- a/test/BaselineOutput/Common/MulticlassLogisticRegression/LogisticRegression-Non-Negative-TrainTest-iris-out.txt
+++ b/test/BaselineOutput/Common/MulticlassLogisticRegression/LogisticRegression-Non-Negative-TrainTest-iris-out.txt
@@ -19,6 +19,10 @@ Accuracy(micro-avg): 0.980000
 Accuracy(macro-avg): 0.980000
 Log-loss:           0.095534
 Log-loss reduction: 0.913041
+Top K accuracy(All K)K=1: 0.980000
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/MulticlassLogisticRegression/LogisticRegression-Non-Negative-TrainTest-iris-rp.txt b/test/BaselineOutput/Common/MulticlassLogisticRegression/LogisticRegression-Non-Negative-TrainTest-iris-rp.txt
index 6e2c6ad943..e9489fa884 100644
--- a/test/BaselineOutput/Common/MulticlassLogisticRegression/LogisticRegression-Non-Negative-TrainTest-iris-rp.txt
+++ b/test/BaselineOutput/Common/MulticlassLogisticRegression/LogisticRegression-Non-Negative-TrainTest-iris-rp.txt
@@ -1,4 +1,4 @@
 MulticlassLogisticRegression
 Accuracy(micro-avg)	Accuracy(macro-avg)	Log-loss	Log-loss reduction	/l2	/l1	/ot	/nt	/nn	Learner Name	Train Dataset	Test Dataset	Results File	Run Time	Physical Memory	Virtual Memory	Command Line	Settings	
-0.98	0.98	0.095534	0.913042	0.1	0.001	0.001	1	+	MulticlassLogisticRegression	%Data%	%Data%	%Output%	99	0	0	maml.exe TrainTest test=%Data% tr=MulticlassLogisticRegression{l1=0.001 l2=0.1 ot=1e-3 nt=1 nn=+} norm=No dout=%Output% data=%Data% out=%Output% seed=1 xf=Term{col=Label}	/l2:0.1;/l1:0.001;/ot:0.001;/nt:1;/nn:+	
+0.98	0.98	0.095534	0.913041	0.1	0.001	0.001	1	+	MulticlassLogisticRegression	%Data%	%Data%	%Output%	99	0	0	maml.exe TrainTest test=%Data% tr=MulticlassLogisticRegression{l1=0.001 l2=0.1 ot=1e-3 nt=1 nn=+} norm=No dout=%Output% data=%Data% out=%Output% seed=1 xf=Term{col=Label}	/l2:0.1;/l1:0.001;/ot:0.001;/nt:1;/nn:+	
 
diff --git a/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-CV-iris-out.txt b/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-CV-iris-out.txt
index 99b7a4a51c..b766b0ba55 100644
--- a/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-CV-iris-out.txt
+++ b/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-CV-iris-out.txt
@@ -25,6 +25,10 @@ Accuracy(micro-avg): 0.962025
 Accuracy(macro-avg): 0.965079
 Log-loss:           0.101866
 Log-loss reduction: 0.906331
+Top K accuracy(All K)K=1: 0.962025
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 Confusion table
           ||========================
@@ -39,6 +43,10 @@ Accuracy(micro-avg): 0.985915
 Accuracy(macro-avg): 0.983333
 Log-loss:           0.075812
 Log-loss reduction: 0.930169
+Top K accuracy(All K)K=1: 0.985915
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-CV-iris-tree-featurized-out.txt b/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-CV-iris-tree-featurized-out.txt
index 3b5ff1562d..0533130117 100644
--- a/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-CV-iris-tree-featurized-out.txt
+++ b/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-CV-iris-tree-featurized-out.txt
@@ -39,6 +39,10 @@ Accuracy(micro-avg): 0.924051
 Accuracy(macro-avg): 0.932540
 Log-loss:           0.330649
 Log-loss reduction: 0.695959
+Top K accuracy(All K)K=1: 0.924051
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 Confusion table
           ||========================
@@ -53,6 +57,10 @@ Accuracy(micro-avg): 0.957746
 Accuracy(macro-avg): 0.953030
 Log-loss:           0.157832
 Log-loss reduction: 0.854620
+Top K accuracy(All K)K=1: 0.957746
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-CV-iris-tree-featurized-permuted-out.txt b/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-CV-iris-tree-featurized-permuted-out.txt
index c11eacd20b..4553c97f03 100644
--- a/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-CV-iris-tree-featurized-permuted-out.txt
+++ b/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-CV-iris-tree-featurized-permuted-out.txt
@@ -39,6 +39,10 @@ Accuracy(micro-avg): 0.924051
 Accuracy(macro-avg): 0.932540
 Log-loss:           0.201590
 Log-loss reduction: 0.814633
+Top K accuracy(All K)K=1: 0.924051
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 Confusion table
           ||========================
@@ -53,6 +57,10 @@ Accuracy(micro-avg): 0.971831
 Accuracy(macro-avg): 0.968182
 Log-loss:           0.101915
 Log-loss reduction: 0.906125
+Top K accuracy(All K)K=1: 0.971831
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-TrainTest-iris-out.txt b/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-TrainTest-iris-out.txt
index 248e2dc6c2..4bd98f6839 100644
--- a/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-TrainTest-iris-out.txt
+++ b/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-TrainTest-iris-out.txt
@@ -19,6 +19,10 @@ Accuracy(micro-avg): 0.980000
 Accuracy(macro-avg): 0.980000
 Log-loss:           0.072218
 Log-loss reduction: 0.934264
+Top K accuracy(All K)K=1: 0.980000
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-TrainTest-iris-tree-featurized-out.txt b/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-TrainTest-iris-tree-featurized-out.txt
index 16e312ae3a..847816539e 100644
--- a/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-TrainTest-iris-tree-featurized-out.txt
+++ b/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-TrainTest-iris-tree-featurized-out.txt
@@ -26,6 +26,10 @@ Accuracy(micro-avg): 0.980000
 Accuracy(macro-avg): 0.980000
 Log-loss:           0.048652
 Log-loss reduction: 0.955715
+Top K accuracy(All K)K=1: 0.980000
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-TrainTest-iris-tree-featurized-permuted-out.txt b/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-TrainTest-iris-tree-featurized-permuted-out.txt
index 3c21d28d0b..511ca9fd0b 100644
--- a/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-TrainTest-iris-tree-featurized-permuted-out.txt
+++ b/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-TrainTest-iris-tree-featurized-permuted-out.txt
@@ -26,6 +26,10 @@ Accuracy(micro-avg): 0.973333
 Accuracy(macro-avg): 0.973333
 Log-loss:           0.052580
 Log-loss reduction: 0.952140
+Top K accuracy(All K)K=1: 0.973333
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/OVA/OVA-CV-iris-out.txt b/test/BaselineOutput/Common/OVA/OVA-CV-iris-out.txt
index 0469c9469e..6d61606b73 100644
--- a/test/BaselineOutput/Common/OVA/OVA-CV-iris-out.txt
+++ b/test/BaselineOutput/Common/OVA/OVA-CV-iris-out.txt
@@ -29,6 +29,10 @@ Accuracy(micro-avg): 0.974684
 Accuracy(macro-avg): 0.977778
 Log-loss:           0.352944
 Log-loss reduction: 0.675458
+Top K accuracy(All K)K=1: 0.974684
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 Confusion table
           ||========================
@@ -43,6 +47,10 @@ Accuracy(micro-avg): 0.971831
 Accuracy(macro-avg): 0.966667
 Log-loss:           0.273754
 Log-loss reduction: 0.747843
+Top K accuracy(All K)K=1: 0.971831
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/OVA/OVA-FastForest-CV-iris-out.txt b/test/BaselineOutput/Common/OVA/OVA-FastForest-CV-iris-out.txt
index 5100848835..6c673f917a 100644
--- a/test/BaselineOutput/Common/OVA/OVA-FastForest-CV-iris-out.txt
+++ b/test/BaselineOutput/Common/OVA/OVA-FastForest-CV-iris-out.txt
@@ -71,6 +71,10 @@ Accuracy(micro-avg): 0.924051
 Accuracy(macro-avg): 0.932540
 Log-loss:           0.197783
 Log-loss reduction: 0.818133
+Top K accuracy(All K)K=1: 0.924051
+Top K accuracy(All K)K=2: 0.987342
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 Confusion table
           ||========================
@@ -85,6 +89,10 @@ Accuracy(micro-avg): 0.957746
 Accuracy(macro-avg): 0.953030
 Log-loss:           0.103360
 Log-loss reduction: 0.904794
+Top K accuracy(All K)K=1: 0.957746
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/OVA/OVA-FastForest-TrainTest-iris-out.txt b/test/BaselineOutput/Common/OVA/OVA-FastForest-TrainTest-iris-out.txt
index e988ef026c..21c3b24263 100644
--- a/test/BaselineOutput/Common/OVA/OVA-FastForest-TrainTest-iris-out.txt
+++ b/test/BaselineOutput/Common/OVA/OVA-FastForest-TrainTest-iris-out.txt
@@ -42,6 +42,10 @@ Accuracy(micro-avg): 0.973333
 Accuracy(macro-avg): 0.973333
 Log-loss:           0.088201
 Log-loss reduction: 0.919716
+Top K accuracy(All K)K=1: 0.973333
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/OVA/OVA-TrainTest-iris-out.txt b/test/BaselineOutput/Common/OVA/OVA-TrainTest-iris-out.txt
index a78f6c8f27..f4e290013b 100644
--- a/test/BaselineOutput/Common/OVA/OVA-TrainTest-iris-out.txt
+++ b/test/BaselineOutput/Common/OVA/OVA-TrainTest-iris-out.txt
@@ -21,6 +21,10 @@ Accuracy(micro-avg): 0.960000
 Accuracy(macro-avg): 0.960000
 Log-loss:           0.254771
 Log-loss reduction: 0.768097
+Top K accuracy(All K)K=1: 0.960000
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/PKPD/PKPD-CV-iris-out.txt b/test/BaselineOutput/Common/PKPD/PKPD-CV-iris-out.txt
index 21e6eee190..849b1d8cfd 100644
--- a/test/BaselineOutput/Common/PKPD/PKPD-CV-iris-out.txt
+++ b/test/BaselineOutput/Common/PKPD/PKPD-CV-iris-out.txt
@@ -41,6 +41,10 @@ Accuracy(micro-avg): 0.974684
 Accuracy(macro-avg): 0.977778
 Log-loss:           0.359335
 Log-loss reduction: 0.669582
+Top K accuracy(All K)K=1: 0.974684
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 Confusion table
           ||========================
@@ -55,6 +59,10 @@ Accuracy(micro-avg): 0.971831
 Accuracy(macro-avg): 0.966667
 Log-loss:           0.277823
 Log-loss reduction: 0.744095
+Top K accuracy(All K)K=1: 0.971831
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/PKPD/PKPD-TrainTest-iris-out.txt b/test/BaselineOutput/Common/PKPD/PKPD-TrainTest-iris-out.txt
index c509772806..a5bca1aaa6 100644
--- a/test/BaselineOutput/Common/PKPD/PKPD-TrainTest-iris-out.txt
+++ b/test/BaselineOutput/Common/PKPD/PKPD-TrainTest-iris-out.txt
@@ -27,6 +27,10 @@ Accuracy(micro-avg): 0.960000
 Accuracy(macro-avg): 0.960000
 Log-loss:           0.255665
 Log-loss reduction: 0.767284
+Top K accuracy(All K)K=1: 0.960000
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------

From b7ded43a5aa24057cae4dbede258ed39c757539a Mon Sep 17 00:00:00 2001
From: Jason DeBoever <github@deboever.us>
Date: Sat, 12 Sep 2020 16:53:08 -0400
Subject: [PATCH 05/28] Fixed test reference outputs for NetCore31

---
 .../MulticlassLogisticRegression-CV-iris-out.txt          | 8 ++++++++
 .../netcoreapp31/WE-Average-TrainTest-iris-out.txt        | 4 ++++
 .../netcoreapp31/WE-Bootstrap-TrainTest-iris-out.txt      | 4 ++++
 .../netcoreapp31/WE-Stacking-TrainTest-iris-out.txt       | 4 ++++
 .../netcoreapp31/WE-Voting-TrainTest-iris-out.txt         | 4 ++++
 5 files changed, 24 insertions(+)

diff --git a/test/BaselineOutput/Common/MulticlassLogisticRegression/netcoreapp31/MulticlassLogisticRegression-CV-iris-out.txt b/test/BaselineOutput/Common/MulticlassLogisticRegression/netcoreapp31/MulticlassLogisticRegression-CV-iris-out.txt
index 6f8b64ff3c..04fb413c9e 100644
--- a/test/BaselineOutput/Common/MulticlassLogisticRegression/netcoreapp31/MulticlassLogisticRegression-CV-iris-out.txt
+++ b/test/BaselineOutput/Common/MulticlassLogisticRegression/netcoreapp31/MulticlassLogisticRegression-CV-iris-out.txt
@@ -25,6 +25,10 @@ Accuracy(micro-avg): 0.962025
 Accuracy(macro-avg): 0.965079
 Log-loss:           0.101866
 Log-loss reduction: 0.906331
+Top K accuracy(All K)K=1: 0.962025
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 Confusion table
           ||========================
@@ -39,6 +43,10 @@ Accuracy(micro-avg): 0.985915
 Accuracy(macro-avg): 0.983333
 Log-loss:           0.075796
 Log-loss reduction: 0.930183
+Top K accuracy(All K)K=1: 0.985915
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/WeightedEnsembleMulticlass/netcoreapp31/WE-Average-TrainTest-iris-out.txt b/test/BaselineOutput/Common/WeightedEnsembleMulticlass/netcoreapp31/WE-Average-TrainTest-iris-out.txt
index f0bf9e77bf..25e12386b9 100644
--- a/test/BaselineOutput/Common/WeightedEnsembleMulticlass/netcoreapp31/WE-Average-TrainTest-iris-out.txt
+++ b/test/BaselineOutput/Common/WeightedEnsembleMulticlass/netcoreapp31/WE-Average-TrainTest-iris-out.txt
@@ -46,6 +46,10 @@ Accuracy(micro-avg): 0.940000
 Accuracy(macro-avg): 0.940000
 Log-loss:           0.433907
 Log-loss reduction: 0.605040
+Top K accuracy(All K)K=1: 0.940000
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/WeightedEnsembleMulticlass/netcoreapp31/WE-Bootstrap-TrainTest-iris-out.txt b/test/BaselineOutput/Common/WeightedEnsembleMulticlass/netcoreapp31/WE-Bootstrap-TrainTest-iris-out.txt
index 2da2bb56f1..97f32d2c65 100644
--- a/test/BaselineOutput/Common/WeightedEnsembleMulticlass/netcoreapp31/WE-Bootstrap-TrainTest-iris-out.txt
+++ b/test/BaselineOutput/Common/WeightedEnsembleMulticlass/netcoreapp31/WE-Bootstrap-TrainTest-iris-out.txt
@@ -136,6 +136,10 @@ Accuracy(micro-avg): 0.940000
 Accuracy(macro-avg): 0.940000
 Log-loss:           0.435480
 Log-loss reduction: 0.603609
+Top K accuracy(All K)K=1: 0.940000
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/WeightedEnsembleMulticlass/netcoreapp31/WE-Stacking-TrainTest-iris-out.txt b/test/BaselineOutput/Common/WeightedEnsembleMulticlass/netcoreapp31/WE-Stacking-TrainTest-iris-out.txt
index 3d92491bd9..e0678dcaa0 100644
--- a/test/BaselineOutput/Common/WeightedEnsembleMulticlass/netcoreapp31/WE-Stacking-TrainTest-iris-out.txt
+++ b/test/BaselineOutput/Common/WeightedEnsembleMulticlass/netcoreapp31/WE-Stacking-TrainTest-iris-out.txt
@@ -52,6 +52,10 @@ Accuracy(micro-avg): 0.900000
 Accuracy(macro-avg): 0.900000
 Log-loss:           0.431192
 Log-loss reduction: 0.607512
+Top K accuracy(All K)K=1: 0.900000
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/WeightedEnsembleMulticlass/netcoreapp31/WE-Voting-TrainTest-iris-out.txt b/test/BaselineOutput/Common/WeightedEnsembleMulticlass/netcoreapp31/WE-Voting-TrainTest-iris-out.txt
index cec0c9958b..f4882c6eed 100644
--- a/test/BaselineOutput/Common/WeightedEnsembleMulticlass/netcoreapp31/WE-Voting-TrainTest-iris-out.txt
+++ b/test/BaselineOutput/Common/WeightedEnsembleMulticlass/netcoreapp31/WE-Voting-TrainTest-iris-out.txt
@@ -46,6 +46,10 @@ Accuracy(micro-avg): 0.946667
 Accuracy(macro-avg): 0.946667
 Log-loss:           0.511576
 Log-loss reduction: 0.534344
+Top K accuracy(All K)K=1: 0.946667
+Top K accuracy(All K)K=2: 0.986667
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------

From 685eeb4ef3908288dc2fe127699fd884d1194a28 Mon Sep 17 00:00:00 2001
From: Jason DeBoever <github@deboever.us>
Date: Thu, 5 Nov 2020 14:51:08 -0500
Subject: [PATCH 06/28] change top k acc output string format

---
 .../Evaluators/MulticlassClassificationEvaluator.cs           | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
index 413ee020b6..753bed6b9a 100644
--- a/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
+++ b/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
@@ -41,7 +41,7 @@ public sealed class Arguments
         public const string AccuracyMicro = "Accuracy(micro-avg)";
         public const string AccuracyMacro = "Accuracy(macro-avg)";
         public const string TopKAccuracy = "Top K accuracy";
-        public const string AllTopKAccuracy = "Top K accuracy(All K)";
+        public const string AllTopKAccuracy = "Top K accuracy";
         public const string PerClassLogLoss = "Per class log-loss";
         public const string LogLoss = "Log-loss";
         public const string LogLossReduction = "Log-loss reduction";
@@ -217,7 +217,7 @@ private protected override void GetAggregatorConsolidationFuncs(Aggregator aggre
 
                     ValueGetter<VBuffer<ReadOnlyMemory<char>>> getKSlotNames =
                         (ref VBuffer<ReadOnlyMemory<char>> dst) =>
-                            dst = new VBuffer<ReadOnlyMemory<char>>(allTopK.First().Length, Enumerable.Range(1,allTopK.First().Length).Select(i=>new ReadOnlyMemory<char>(($"K={i.ToString()}").ToCharArray())).ToArray());
+                            dst = new VBuffer<ReadOnlyMemory<char>>(allTopK.First().Length, Enumerable.Range(1,allTopK.First().Length).Select(i=>new ReadOnlyMemory<char>(($"@K={i.ToString()}").ToCharArray())).ToArray());
                     overallDvBldr.AddColumn(AllTopKAccuracy, getKSlotNames, NumberDataViewType.Double, allTopK.ToArray());
 
                     var confDvBldr = new ArrayDataViewBuilder(Host);

From 1eacec74f512498ac3656a3a9e2a58f8eb713d0a Mon Sep 17 00:00:00 2001
From: Jason DeBoever <github@deboever.us>
Date: Thu, 5 Nov 2020 19:42:06 -0500
Subject: [PATCH 07/28] Ranking algorithm now uses first appearance in dataset
 rather than worstCase

---
 .../MulticlassClassificationEvaluator.cs      | 23 +++++++++++++------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
index 753bed6b9a..809a5fb8c8 100644
--- a/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
+++ b/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
@@ -459,8 +459,6 @@ public override void ProcessRow()
 
                 var intLabel = (int)label;
 
-                var assigned = Array.IndexOf(_scoresArr, _scoresArr.Max()); //perf could be improved
-
                 var wasKnownLabel = true;
 
                 // log-loss
@@ -485,13 +483,24 @@ public override void ProcessRow()
 
                 // Find the rank of the *correct* label (in Scores[]). If 0 => Good, correct. And the lower the better.
                 // The rank will be from 0 to N. (Not N-1).
-                // Problem: What if we have probabilities that are equal to the correct prediction (eg, .6 .1 .1 .1 .1).
-                // This actually happens a lot with some models. Here we assign the worst rank in the case of a tie (so 4 in this example)
-                var correctRankWorstCase = !wasKnownLabel ? _scoresArr.Length : _scoresArr.Count(score => score >= correctProba) - 1;
+                // Problem: What if we have probabilities that are equal to the correct prediction (eg, a:0.1, b:0.1, c:0.1, d:0.6, e:0.1 where c is the correct label).
+                // This actually happens a lot with some models. We handle ties by assigning rank in order of first appearance. In this example, we assign c the rank of 3, because d has a higher probability and a and b are sequentially first.
+                int rankofCorrectLabel = 0;
+                //float highestProb = 0;
+                int assigned = -1;
+                for (int i=0; i < _scoresArr.Length; i++)
+                {
+                    if ( _scoresArr[i] > correctProba || (_scoresArr[i] == correctProba && i < intLabel))
+                        rankofCorrectLabel++;
+
+                    //This is the assigned "prediction" of the model if it has the highest probability.
+                    if (assigned < 0 || _scoresArr[assigned] < _scoresArr[i] )
+                        assigned = i;
+                }
 
-                UnweightedCounters.Update(correctRankWorstCase, assigned, logloss, intLabel, 1);
+                UnweightedCounters.Update(rankofCorrectLabel, assigned, logloss, intLabel, 1);
                 if (WeightedCounters != null)
-                    WeightedCounters.Update(correctRankWorstCase, assigned, logloss, intLabel, weight);
+                    WeightedCounters.Update(rankofCorrectLabel, assigned, logloss, intLabel, weight);
             }
 
             protected override List<string> GetWarningsCore()

From ea057ff292826e85d5a8bd203bcdc79f141e1338 Mon Sep 17 00:00:00 2001
From: Jason DeBoever <github@deboever.us>
Date: Thu, 5 Nov 2020 20:05:14 -0500
Subject: [PATCH 08/28] fixed benchmark

---
 .../StochasticDualCoordinateAscentClassifierBench.cs       | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs
index 6404b2d290..cf0667784c 100644
--- a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs
+++ b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs
@@ -38,6 +38,7 @@ public class StochasticDualCoordinateAscentClassifierBench : WithExtraMetrics
         private IrisData[][] _batches;
         private MulticlassClassificationMetrics _metrics;
         private MulticlassClassificationEvaluator _evaluator;
+        private IDataView _scoredIrisTestData;
 
         protected override IEnumerable<Metric> GetMetrics()
         {
@@ -142,9 +143,9 @@ public void SetupPredictBenchmarks()
             var loader = new TextLoader(_mlContext, options: options);
 
             IDataView testData = loader.Load(_dataPath);
-            IDataView scoredTestData = _trainedModel.Transform(testData);
+            _scoredIrisTestData = _trainedModel.Transform(testData);
             _evaluator = new MulticlassClassificationEvaluator(_mlContext, new MulticlassClassificationEvaluator.Arguments());
-            _metrics = _evaluator.Evaluate(scoredTestData, DefaultColumnNames.Label, DefaultColumnNames.Score, DefaultColumnNames.PredictedLabel);
+            _metrics = _evaluator.Evaluate(_scoredIrisTestData, DefaultColumnNames.Label, DefaultColumnNames.Score, DefaultColumnNames.PredictedLabel);
 
             _batches = new IrisData[_batchSizes.Length][];
             for (int i = 0; i < _batches.Length; i++)
@@ -171,7 +172,7 @@ public void SetupPredictBenchmarks()
         public void PredictIrisBatchOf5() => _trainedModel.Transform(_mlContext.Data.LoadFromEnumerable(_batches[2]));
 
         [Benchmark]
-        public void EvaluateMetrics() => _evaluator = new MulticlassClassificationEvaluator(_mlContext, new MulticlassClassificationEvaluator.Arguments());
+        public void EvaluateMetrics() => _evaluator.Evaluate(_scoredIrisTestData, DefaultColumnNames.Label, DefaultColumnNames.Score, DefaultColumnNames.PredictedLabel);
     }
 
     public class IrisData

From ac08554f7122ccfe2fe8603656f856502c29174c Mon Sep 17 00:00:00 2001
From: Jason DeBoever <github@deboever.us>
Date: Thu, 5 Nov 2020 22:02:21 -0500
Subject: [PATCH 09/28] various minor changes from code review

---
 .../MulticlassClassificationEvaluator.cs      | 30 ++++++++++++-------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
index 809a5fb8c8..6dfd7079f4 100644
--- a/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
+++ b/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
@@ -171,8 +171,10 @@ private protected override void GetAggregatorConsolidationFuncs(Aggregator aggre
                     logLoss.Add(agg.UnweightedCounters.LogLoss);
                     logLossRed.Add(agg.UnweightedCounters.Reduction);
                     if (agg.UnweightedCounters.OutputTopKAcc > 0)
+                    {
                         topKAcc.Add(agg.UnweightedCounters.TopKAccuracy);
-                    allTopK.Add(agg.UnweightedCounters.AllTopKAccuracy);
+                        allTopK.Add(agg.UnweightedCounters.AllTopKAccuracy);
+                    }
                     perClassLogLoss.Add(agg.UnweightedCounters.PerClassLogLoss);
 
                     confStratCol.AddRange(agg.UnweightedCounters.ConfusionTable.Select(x => stratColKey));
@@ -189,8 +191,10 @@ private protected override void GetAggregatorConsolidationFuncs(Aggregator aggre
                         logLoss.Add(agg.WeightedCounters.LogLoss);
                         logLossRed.Add(agg.WeightedCounters.Reduction);
                         if (agg.WeightedCounters.OutputTopKAcc > 0)
+                        {
                             topKAcc.Add(agg.WeightedCounters.TopKAccuracy);
-                        allTopK.Add(agg.WeightedCounters.AllTopKAccuracy);
+                            allTopK.Add(agg.WeightedCounters.AllTopKAccuracy);
+                        }
                         perClassLogLoss.Add(agg.WeightedCounters.PerClassLogLoss);
                         weights.AddRange(agg.WeightedCounters.ConfusionTable);
                     }
@@ -212,13 +216,16 @@ private protected override void GetAggregatorConsolidationFuncs(Aggregator aggre
                     overallDvBldr.AddColumn(LogLoss, NumberDataViewType.Double, logLoss.ToArray());
                     overallDvBldr.AddColumn(LogLossReduction, NumberDataViewType.Double, logLossRed.ToArray());
                     if (aggregator.UnweightedCounters.OutputTopKAcc > 0)
+                    {
                         overallDvBldr.AddColumn(TopKAccuracy, NumberDataViewType.Double, topKAcc.ToArray());
-                    overallDvBldr.AddColumn(PerClassLogLoss, aggregator.GetSlotNames, NumberDataViewType.Double, perClassLogLoss.ToArray());
 
-                    ValueGetter<VBuffer<ReadOnlyMemory<char>>> getKSlotNames =
-                        (ref VBuffer<ReadOnlyMemory<char>> dst) =>
-                            dst = new VBuffer<ReadOnlyMemory<char>>(allTopK.First().Length, Enumerable.Range(1,allTopK.First().Length).Select(i=>new ReadOnlyMemory<char>(($"@K={i.ToString()}").ToCharArray())).ToArray());
-                    overallDvBldr.AddColumn(AllTopKAccuracy, getKSlotNames, NumberDataViewType.Double, allTopK.ToArray());
+                        ValueGetter<VBuffer<ReadOnlyMemory<char>>> getKSlotNames =
+                            (ref VBuffer<ReadOnlyMemory<char>> dst) =>
+                                dst = new VBuffer<ReadOnlyMemory<char>>(allTopK.First().Length, Enumerable.Range(1, allTopK.First().Length).Select(i => new ReadOnlyMemory<char>(($"@K={i.ToString()}").ToCharArray())).ToArray());
+                        overallDvBldr.AddColumn(AllTopKAccuracy, getKSlotNames, NumberDataViewType.Double, allTopK.ToArray());
+                    }
+
+                    overallDvBldr.AddColumn(PerClassLogLoss, aggregator.GetSlotNames, NumberDataViewType.Double, perClassLogLoss.ToArray());
 
                     var confDvBldr = new ArrayDataViewBuilder(Host);
                     if (hasStrats)
@@ -362,7 +369,7 @@ public void Update(int seenRank, int assigned, double loglossCurr, int label, fl
                 private static IEnumerable<double> CumulativeSum(IEnumerable<double> s)
                 {
                     double sum = 0;
-                    ;
+
                     foreach (var x in s)
                     {
                         sum += x;
@@ -481,9 +488,9 @@ public override void ProcessRow()
                 // Get the probability that the CORRECT label has: (best case is that it's the highest probability):
                 var correctProba = !wasKnownLabel ? 0 : _scoresArr[intLabel];
 
-                // Find the rank of the *correct* label (in Scores[]). If 0 => Good, correct. And the lower the better.
-                // The rank will be from 0 to N. (Not N-1).
-                // Problem: What if we have probabilities that are equal to the correct prediction (eg, a:0.1, b:0.1, c:0.1, d:0.6, e:0.1 where c is the correct label).
+                // Find the rank of the *correct* label (in _scoresArr[]). If the correct (ground truth) labels gets rank 0, it means the model assigned it the highest probability (that's ideal). Rank 1 would mean our model gives the real label the 2nd highest probabality, etc.
+                // The rank will be from 0 to N. (Not N-1). Rank N is used for unrecognized values.
+                // Situation: What if we have probabilities that are equal to the correct prediction (eg, a:0.1, b:0.1, c:0.1, d:0.6, e:0.1 where c is the correct label).
                 // This actually happens a lot with some models. We handle ties by assigning rank in order of first appearance. In this example, we assign c the rank of 3, because d has a higher probability and a and b are sequentially first.
                 int rankofCorrectLabel = 0;
                 //float highestProb = 0;
@@ -1022,6 +1029,7 @@ public override IEnumerable<MetricColumn> GetOverallMetricColumns()
             }
             yield return new MetricColumn("LogLoss", MulticlassClassificationEvaluator.LogLoss, MetricColumn.Objective.Minimize);
             yield return new MetricColumn("LogLossReduction", MulticlassClassificationEvaluator.LogLossReduction);
+            yield return new MetricColumn("TopKAccuracyForAllK", MulticlassClassificationEvaluator.AllTopKAccuracy, isVector: true);
         }
 
         private protected override IEnumerable<string> GetPerInstanceColumnsToSave(RoleMappedSchema schema)

From f0de3ea5bd3bed982a1c3d4fef767b52cbd944e8 Mon Sep 17 00:00:00 2001
From: Jason DeBoever <github@deboever.us>
Date: Thu, 5 Nov 2020 22:16:26 -0500
Subject: [PATCH 10/28] limit TopK to OutputTopKAcc parameter

---
 .../Evaluators/MulticlassClassificationEvaluator.cs             | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
index 6dfd7079f4..88fac1e8d1 100644
--- a/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
+++ b/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
@@ -308,7 +308,7 @@ public double Reduction
                 }
 
                 public double TopKAccuracy => !(OutputTopKAcc is null) ? AllTopKAccuracy[OutputTopKAcc.Value] : 0d;
-                public double[] AllTopKAccuracy => CumulativeSum(_seenRanks.Select(l => l / (double)(_numInstances - _numUnknownClassInstances))).ToArray();
+                public double[] AllTopKAccuracy => CumulativeSum(_seenRanks.Take(OutputTopKAcc ?? 0).Select(l => l / (double)(_numInstances - _numUnknownClassInstances))).ToArray();
 
                 // The per class average log loss is calculated by dividing the weighted sum of the log loss of examples
                 // in each class by the total weight of examples in that class.

From 30fbd6f411caad572739e9371a48e0baf4f7d5ea Mon Sep 17 00:00:00 2001
From: Jason DeBoever <github@deboever.us>
Date: Fri, 6 Nov 2020 12:01:04 -0500
Subject: [PATCH 11/28] top k output name changes

---
 .../Evaluators/EvaluatorUtils.cs              |  8 ++-
 .../MulticlassClassificationEvaluator.cs      | 49 +++++++++----------
 2 files changed, 31 insertions(+), 26 deletions(-)

diff --git a/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs b/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs
index 07bec9516e..bf9d9a95c8 100644
--- a/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs
+++ b/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs
@@ -1035,7 +1035,13 @@ private static List<string> GetMetricNames(IChannel ch, DataViewSchema schema, D
                         names = editor.Commit();
                     }
                     foreach (var name in names.Items(all: true))
-                        metricNames.Add(string.Format("{0}{1}", metricName, name.Value));
+                    {
+                        var tryNaming = string.Format(metricName, name.Value);
+                        if (tryNaming == metricName) //metricName wasn't a format string, so just append slotname
+                            tryNaming = (string.Format("{0}{1}", metricName, name.Value));
+
+                        metricNames.Add(tryNaming);
+                    }
                 }
             }
             ch.Assert(metricNames.Count == metricCount);
diff --git a/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
index 88fac1e8d1..f080329cd7 100644
--- a/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
+++ b/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
@@ -41,7 +41,7 @@ public sealed class Arguments
         public const string AccuracyMicro = "Accuracy(micro-avg)";
         public const string AccuracyMacro = "Accuracy(macro-avg)";
         public const string TopKAccuracy = "Top K accuracy";
-        public const string AllTopKAccuracy = "Top K accuracy";
+        public const string AllTopKAccuracy = "Top K accuracies";
         public const string PerClassLogLoss = "Per class log-loss";
         public const string LogLoss = "Log-loss";
         public const string LogLossReduction = "Log-loss reduction";
@@ -221,7 +221,7 @@ private protected override void GetAggregatorConsolidationFuncs(Aggregator aggre
 
                         ValueGetter<VBuffer<ReadOnlyMemory<char>>> getKSlotNames =
                             (ref VBuffer<ReadOnlyMemory<char>> dst) =>
-                                dst = new VBuffer<ReadOnlyMemory<char>>(allTopK.First().Length, Enumerable.Range(1, allTopK.First().Length).Select(i => new ReadOnlyMemory<char>(($"@K={i.ToString()}").ToCharArray())).ToArray());
+                                dst = new VBuffer<ReadOnlyMemory<char>>(allTopK.First().Length, Enumerable.Range(1, allTopK.First().Length).Select(i => new ReadOnlyMemory<char>(i.ToString().ToCharArray())).ToArray());
                         overallDvBldr.AddColumn(AllTopKAccuracy, getKSlotNames, NumberDataViewType.Double, allTopK.ToArray());
                     }
 
@@ -307,7 +307,7 @@ public double Reduction
                     }
                 }
 
-                public double TopKAccuracy => !(OutputTopKAcc is null) ? AllTopKAccuracy[OutputTopKAcc.Value] : 0d;
+                public double TopKAccuracy => !(OutputTopKAcc is null) ? AllTopKAccuracy[OutputTopKAcc.Value-1] : 0d;
                 public double[] AllTopKAccuracy => CumulativeSum(_seenRanks.Take(OutputTopKAcc ?? 0).Select(l => l / (double)(_numInstances - _numUnknownClassInstances))).ToArray();
 
                 // The per class average log loss is calculated by dividing the weighted sum of the log loss of examples
@@ -924,13 +924,16 @@ private protected override void PrintFoldResultsCore(IChannel ch, Dictionary<str
             if (!metrics.TryGetValue(MetricKinds.ConfusionMatrix, out IDataView conf))
                 throw ch.Except("No confusion matrix found");
 
-            // Change the name of the Top-k-accuracy column.
+            // Change the name of the Top-k-accuracies collection column & remove redundant old TopK output
             if (_outputTopKAcc != null)
-                fold = ChangeTopKAccColumnName(fold);
+            {
+                fold = ChangeAllTopKAccColumnName(fold);
+                fold = DropColumn(fold, MulticlassClassificationEvaluator.TopKAccuracy);
+            }
 
             // Drop the per-class information.
             if (!_outputPerClass)
-                fold = DropPerClassColumn(fold);
+                fold = DropColumn(fold, MulticlassClassificationEvaluator.PerClassLogLoss);
 
             var unweightedConf = MetricWriter.GetConfusionTableAsFormattedString(Host, conf, out string weightedConf, false, _numConfusionTableClasses);
             var unweightedFold = MetricWriter.GetPerFoldResults(Host, fold, out string weightedFold);
@@ -951,9 +954,16 @@ private protected override IDataView CombineOverallMetricsCore(IDataView[] metri
             for (int i = 0; i < metrics.Length; i++)
             {
                 var idv = metrics[i];
-                idv = DropAllTopKColumn(idv);
+
+                // Change the name of the Top-k-accuracies collection column & remove redundant old TopK output
+                if (_outputTopKAcc != null)
+                {
+                    idv = ChangeAllTopKAccColumnName(idv);
+                    idv = DropColumn(idv, MulticlassClassificationEvaluator.TopKAccuracy);
+                }
+
                 if (!_outputPerClass)
-                    idv = DropPerClassColumn(idv);
+                    idv = DropColumn(idv, MulticlassClassificationEvaluator.PerClassLogLoss);
 
                 overallList.Add(idv);
             }
@@ -984,34 +994,23 @@ private protected override IDataView CombineOverallMetricsCore(IDataView[] metri
             return base.CombineOverallMetricsCore(views);
         }
 
-        private protected override IDataView GetOverallResultsCore(IDataView overall)
-        {
-            // Change the name of the Top-k-accuracy column.
-            if (_outputTopKAcc != null)
-                overall = ChangeTopKAccColumnName(overall);
-            return overall;
-        }
-
         private IDataView ChangeTopKAccColumnName(IDataView input)
         {
             input = new ColumnCopyingTransformer(Host, (string.Format(TopKAccuracyFormat, _outputTopKAcc), MulticlassClassificationEvaluator.TopKAccuracy)).Transform(input);
             return ColumnSelectingTransformer.CreateDrop(Host, input, MulticlassClassificationEvaluator.TopKAccuracy);
         }
 
-        private IDataView DropPerClassColumn(IDataView input)
+        private IDataView ChangeAllTopKAccColumnName(IDataView input)
         {
-            if (input.Schema.TryGetColumnIndex(MulticlassClassificationEvaluator.PerClassLogLoss, out int perClassCol))
-            {
-                input = ColumnSelectingTransformer.CreateDrop(Host, input, MulticlassClassificationEvaluator.PerClassLogLoss);
-            }
-            return input;
+            input = new ColumnCopyingTransformer(Host, (TopKAccuracyFormat, MulticlassClassificationEvaluator.AllTopKAccuracy)).Transform(input);
+            return ColumnSelectingTransformer.CreateDrop(Host, input, MulticlassClassificationEvaluator.AllTopKAccuracy);
         }
 
-        private IDataView DropAllTopKColumn(IDataView input)
+        private IDataView DropColumn(IDataView input, string columnToDrop)
         {
-            if (input.Schema.TryGetColumnIndex(MulticlassClassificationEvaluator.AllTopKAccuracy, out int AllTopKCol))
+            if (input.Schema.TryGetColumnIndex(columnToDrop, out int ColInd))
             {
-                input = ColumnSelectingTransformer.CreateDrop(Host, input, MulticlassClassificationEvaluator.AllTopKAccuracy);
+                input = ColumnSelectingTransformer.CreateDrop(Host, input, columnToDrop);
             }
             return input;
         }

From 495b4b03fdccc927ce702de57728d2a54b9d16c3 Mon Sep 17 00:00:00 2001
From: Jason DeBoever <github@deboever.us>
Date: Fri, 6 Nov 2020 15:27:39 -0500
Subject: [PATCH 12/28] make old TopK readOnly

---
 .../Runners/CrossValSummaryRunner.cs          |  8 ++++---
 .../MulticlassClassificationMetrics.cs        | 23 ++++++++++---------
 .../PermutationFeatureImportanceExtensions.cs |  3 ++-
 3 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/src/Microsoft.ML.AutoML/Experiment/Runners/CrossValSummaryRunner.cs b/src/Microsoft.ML.AutoML/Experiment/Runners/CrossValSummaryRunner.cs
index 3697268936..fc911f52f6 100644
--- a/src/Microsoft.ML.AutoML/Experiment/Runners/CrossValSummaryRunner.cs
+++ b/src/Microsoft.ML.AutoML/Experiment/Runners/CrossValSummaryRunner.cs
@@ -8,6 +8,7 @@
 using System.Linq;
 using Microsoft.ML.Data;
 using Microsoft.ML.Runtime;
+using Microsoft.ML.Trainers.FastTree;
 
 namespace Microsoft.ML.AutoML
 {
@@ -123,11 +124,12 @@ private static TMetrics GetAverageMetrics(IEnumerable<TMetrics> metrics, TMetric
                     logLoss: GetAverageOfNonNaNScores(newMetrics.Select(x => x.LogLoss)),
                     logLossReduction: GetAverageOfNonNaNScores(newMetrics.Select(x => x.LogLossReduction)),
                     topKPredictionCount: newMetrics.ElementAt(0).TopKPredictionCount,
-                    topKAccuracy: GetAverageOfNonNaNScores(newMetrics.Select(x => x.TopKAccuracy)),
-                    // Return PerClassLogLoss and ConfusionMatrix from the fold closest to average score
+                    //need to "transpose"/rotate this array of arrays so we can average across all top 0, top 1, top 2, etc
+                    topKAccuracies: newMetrics.SelectMany(nm => nm.TopKAccuracyForAllK.Select((double tk, int i) => (i, tk))).ToLookup(itk => itk.i, itk => itk.tk).Select(tk => GetAverageOfNonNaNScores(tk)).ToArray(),
                     perClassLogLoss: (metricsClosestToAvg as MulticlassClassificationMetrics).PerClassLogLoss.ToArray(),
-                    confusionMatrix: (metricsClosestToAvg as MulticlassClassificationMetrics).ConfusionMatrix);
+                    confusionMatrix: (metricsClosestToAvg as MulticlassClassificationMetrics).ConfusionMatrix) ;
                 return result as TMetrics;
+
             }
 
             if (typeof(TMetrics) == typeof(RegressionMetrics))
diff --git a/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassificationMetrics.cs b/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassificationMetrics.cs
index 367d92385b..f06f943027 100644
--- a/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassificationMetrics.cs
+++ b/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassificationMetrics.cs
@@ -2,6 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
+using System;
 using System.Collections.Generic;
 using System.Collections.Immutable;
 using System.Linq;
@@ -72,18 +73,18 @@ public sealed class MulticlassClassificationMetrics
         public double MicroAccuracy { get; }
 
         /// <summary>
-        /// If <see cref="TopKPredictionCount"/> is positive, this is the relative number of examples where
+        /// Convenience method for "TopKAccuracyForAllK[TopKPredictionCount - 1]". If <see cref="TopKPredictionCount"/> is positive, this is the relative number of examples where
         /// the true label is one of the top-k predicted labels by the predictor.
         /// </summary>
-        public double TopKAccuracy { get; }
+        public double TopKAccuracy => TopKAccuracyForAllK?.LastOrDefault() ?? 0;
 
         /// <summary>
-        /// If positive, this indicates the K in <see cref="TopKAccuracy"/>.
+        /// If positive, this indicates the K in <see cref="TopKAccuracy"/> and <see cref="TopKAccuracy"/>.
         /// </summary>
         public int TopKPredictionCount { get; }
 
         /// <summary>
-        /// Returns the top K for all K from 1 to the number of classes
+        /// Returns the top K accuracy for all K from 1 to the value of TopKPredictionCount.
         /// </summary>
         public IReadOnlyList<double> TopKAccuracyForAllK { get; }
 
@@ -120,10 +121,10 @@ internal MulticlassClassificationMetrics(IHost host, DataViewRow overallResult,
             MacroAccuracy = FetchDouble(MulticlassClassificationEvaluator.AccuracyMacro);
             LogLoss = FetchDouble(MulticlassClassificationEvaluator.LogLoss);
             LogLossReduction = FetchDouble(MulticlassClassificationEvaluator.LogLossReduction);
-            TopKAccuracyForAllK = RowCursorUtils.Fetch<VBuffer<double>>(host, overallResult, MulticlassClassificationEvaluator.AllTopKAccuracy).DenseValues().ToImmutableArray();
+            if (TopKPredictionCount > 0)
+                TopKAccuracyForAllK = RowCursorUtils.Fetch<VBuffer<double>>(host, overallResult, MulticlassClassificationEvaluator.AllTopKAccuracy).DenseValues().ToImmutableArray();
+
             TopKPredictionCount = topKPredictionCount;
-            if (topKPredictionCount > 0)
-                TopKAccuracy = TopKAccuracyForAllK[topKPredictionCount-1];
 
             var perClassLogLoss = RowCursorUtils.Fetch<VBuffer<double>>(host, overallResult, MulticlassClassificationEvaluator.PerClassLogLoss);
             PerClassLogLoss = perClassLogLoss.DenseValues().ToImmutableArray();
@@ -131,20 +132,20 @@ internal MulticlassClassificationMetrics(IHost host, DataViewRow overallResult,
         }
 
         internal MulticlassClassificationMetrics(double accuracyMicro, double accuracyMacro, double logLoss, double logLossReduction,
-            int topKPredictionCount, double topKAccuracy, double[] perClassLogLoss)
+            int topKPredictionCount, double[] topKAccuracies, double[] perClassLogLoss)
         {
             MicroAccuracy = accuracyMicro;
             MacroAccuracy = accuracyMacro;
             LogLoss = logLoss;
             LogLossReduction = logLossReduction;
             TopKPredictionCount = topKPredictionCount;
-            TopKAccuracy = topKAccuracy;
+            TopKAccuracyForAllK = topKAccuracies;
             PerClassLogLoss = perClassLogLoss.ToImmutableArray();
         }
 
         internal MulticlassClassificationMetrics(double accuracyMicro, double accuracyMacro, double logLoss, double logLossReduction,
-            int topKPredictionCount, double topKAccuracy, double[] perClassLogLoss, ConfusionMatrix confusionMatrix)
-            : this(accuracyMicro, accuracyMacro, logLoss, logLossReduction, topKPredictionCount, topKAccuracy, perClassLogLoss)
+            int topKPredictionCount, double[] topKAccuracies, double[] perClassLogLoss, ConfusionMatrix confusionMatrix)
+            : this(accuracyMicro, accuracyMacro, logLoss, logLossReduction, topKPredictionCount, topKAccuracies, perClassLogLoss)
         {
             ConfusionMatrix = confusionMatrix;
         }
diff --git a/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs b/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs
index 02e8832f0e..8be82d7fde 100644
--- a/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs
+++ b/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs
@@ -4,6 +4,7 @@
 
 using System.Collections.Generic;
 using System.Collections.Immutable;
+using System.Linq;
 using Microsoft.ML.Data;
 using Microsoft.ML.Runtime;
 using Microsoft.ML.Transforms;
@@ -251,7 +252,7 @@ private static MulticlassClassificationMetrics MulticlassClassificationDelta(
                 logLoss: a.LogLoss - b.LogLoss,
                 logLossReduction: a.LogLossReduction - b.LogLossReduction,
                 topKPredictionCount: a.TopKPredictionCount,
-                topKAccuracy: a.TopKAccuracy - b.TopKAccuracy,
+                topKAccuracies: a?.TopKAccuracyForAllK?.Zip(b.TopKAccuracyForAllK, (a,b)=>a-b)?.ToArray(),
                 perClassLogLoss: perClassLogLoss
                 );
         }

From c3afe152f2a1cc29b38ca77c4b71fe9cade35fd4 Mon Sep 17 00:00:00 2001
From: Jason DeBoever <github@deboever.us>
Date: Fri, 6 Nov 2020 15:31:50 -0500
Subject: [PATCH 13/28] restored old baselineOutputs since respecting
 outputTopK param means no topK in most test output

---
 .../Common/LightGBMMC/LightGBMMC-CV-iris.key-out.txt      | 8 --------
 .../Common/LightGBMMC/LightGBMMC-CV-iris.keyU404-out.txt  | 8 --------
 .../LightGBMMC/LightGBMMC-TrainTest-iris.key-out.txt      | 4 ----
 .../LightGBMMC/LightGBMMC-TrainTest-iris.keyU404-out.txt  | 4 ----
 .../MultiClassNaiveBayes-CV-breast-cancer-out.txt         | 6 ------
 .../MultiClassNaiveBayes-TrainTest-breast-cancer-out.txt  | 3 ---
 .../LogisticRegression-Non-Negative-CV-iris-out.txt       | 8 --------
 ...LogisticRegression-Non-Negative-TrainTest-iris-out.txt | 4 ----
 .../LogisticRegression-Non-Negative-TrainTest-iris-rp.txt | 2 +-
 .../MulticlassLogisticRegression-CV-iris-out.txt          | 8 --------
 ...lassLogisticRegression-CV-iris-tree-featurized-out.txt | 8 --------
 ...ticRegression-CV-iris-tree-featurized-permuted-out.txt | 8 --------
 .../MulticlassLogisticRegression-TrainTest-iris-out.txt   | 4 ----
 ...isticRegression-TrainTest-iris-tree-featurized-out.txt | 4 ----
 ...ession-TrainTest-iris-tree-featurized-permuted-out.txt | 4 ----
 .../MulticlassLogisticRegression-CV-iris-out.txt          | 8 --------
 test/BaselineOutput/Common/OVA/OVA-CV-iris-out.txt        | 8 --------
 .../Common/OVA/OVA-FastForest-CV-iris-out.txt             | 8 --------
 .../Common/OVA/OVA-FastForest-TrainTest-iris-out.txt      | 4 ----
 test/BaselineOutput/Common/OVA/OVA-TrainTest-iris-out.txt | 4 ----
 test/BaselineOutput/Common/PKPD/PKPD-CV-iris-out.txt      | 8 --------
 .../Common/PKPD/PKPD-TrainTest-iris-out.txt               | 4 ----
 .../WE-Average-TrainTest-iris-out.txt                     | 4 ----
 .../WE-Bootstrap-TrainTest-iris-out.txt                   | 4 ----
 .../WE-SDCA-Average-TrainTest-iris-out.txt                | 4 ----
 .../WE-Stacking-TrainTest-iris-out.txt                    | 4 ----
 .../WE-Voting-TrainTest-iris-out.txt                      | 4 ----
 .../netcoreapp31/WE-Average-TrainTest-iris-out.txt        | 4 ----
 .../netcoreapp31/WE-Bootstrap-TrainTest-iris-out.txt      | 4 ----
 .../netcoreapp31/WE-Stacking-TrainTest-iris-out.txt       | 4 ----
 .../netcoreapp31/WE-Voting-TrainTest-iris-out.txt         | 4 ----
 31 files changed, 1 insertion(+), 162 deletions(-)

diff --git a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.key-out.txt b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.key-out.txt
index f2af91addf..cfe6101e4a 100644
--- a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.key-out.txt
+++ b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.key-out.txt
@@ -23,10 +23,6 @@ Accuracy(micro-avg): 0.936709
 Accuracy(macro-avg): 0.942857
 Log-loss:           0.285741
 Log-loss reduction: 0.737254
-Top K accuracy(All K)K=1: 0.936709
-Top K accuracy(All K)K=2: 1.000000
-Top K accuracy(All K)K=3: 1.000000
-Top K accuracy(All K)K=4: 1.000000
 
 Confusion table
           ||========================
@@ -41,10 +37,6 @@ Accuracy(micro-avg): 0.957746
 Accuracy(macro-avg): 0.953030
 Log-loss:           0.160970
 Log-loss reduction: 0.851729
-Top K accuracy(All K)K=1: 0.957746
-Top K accuracy(All K)K=2: 1.000000
-Top K accuracy(All K)K=3: 1.000000
-Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.keyU404-out.txt b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.keyU404-out.txt
index b1a55bcb0d..0f8b13a0c5 100644
--- a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.keyU404-out.txt
+++ b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.keyU404-out.txt
@@ -23,10 +23,6 @@ Accuracy(micro-avg): 0.936709
 Accuracy(macro-avg): 0.942857
 Log-loss:           0.285741
 Log-loss reduction: 0.737254
-Top K accuracy(All K)K=1: 0.936709
-Top K accuracy(All K)K=2: 1.000000
-Top K accuracy(All K)K=3: 1.000000
-Top K accuracy(All K)K=4: 1.000000
 
 Confusion table
           ||========================
@@ -41,10 +37,6 @@ Accuracy(micro-avg): 0.957746
 Accuracy(macro-avg): 0.953030
 Log-loss:           0.160970
 Log-loss reduction: 0.851729
-Top K accuracy(All K)K=1: 0.957746
-Top K accuracy(All K)K=2: 1.000000
-Top K accuracy(All K)K=3: 1.000000
-Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.key-out.txt b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.key-out.txt
index 857003cbf3..880c8fa977 100644
--- a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.key-out.txt
+++ b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.key-out.txt
@@ -18,10 +18,6 @@ Accuracy(micro-avg): 0.973333
 Accuracy(macro-avg): 0.973333
 Log-loss:           0.161048
 Log-loss reduction: 0.853408
-Top K accuracy(All K)K=1: 0.973333
-Top K accuracy(All K)K=2: 1.000000
-Top K accuracy(All K)K=3: 1.000000
-Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.keyU404-out.txt b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.keyU404-out.txt
index 6f242a10ae..7085992a17 100644
--- a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.keyU404-out.txt
+++ b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.keyU404-out.txt
@@ -18,10 +18,6 @@ Accuracy(micro-avg): 0.973333
 Accuracy(macro-avg): 0.973333
 Log-loss:           0.161048
 Log-loss reduction: 0.853408
-Top K accuracy(All K)K=1: 0.973333
-Top K accuracy(All K)K=2: 1.000000
-Top K accuracy(All K)K=3: 1.000000
-Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/MultiClassNaiveBayes/MultiClassNaiveBayes-CV-breast-cancer-out.txt b/test/BaselineOutput/Common/MultiClassNaiveBayes/MultiClassNaiveBayes-CV-breast-cancer-out.txt
index 301032b3ea..54e16b5640 100644
--- a/test/BaselineOutput/Common/MultiClassNaiveBayes/MultiClassNaiveBayes-CV-breast-cancer-out.txt
+++ b/test/BaselineOutput/Common/MultiClassNaiveBayes/MultiClassNaiveBayes-CV-breast-cancer-out.txt
@@ -16,9 +16,6 @@ Accuracy(micro-avg): 0.629834
 Accuracy(macro-avg): 0.500000
 Log-loss:           34.538776
 Log-loss reduction: -51.407404
-Top K accuracy(All K)K=1: 0.629834
-Top K accuracy(All K)K=2: 1.000000
-Top K accuracy(All K)K=3: 1.000000
 
 Confusion table
           ||======================
@@ -32,9 +29,6 @@ Accuracy(micro-avg): 0.682493
 Accuracy(macro-avg): 0.500000
 Log-loss:           34.538776
 Log-loss reduction: -54.264136
-Top K accuracy(All K)K=1: 0.682493
-Top K accuracy(All K)K=2: 1.000000
-Top K accuracy(All K)K=3: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/MultiClassNaiveBayes/MultiClassNaiveBayes-TrainTest-breast-cancer-out.txt b/test/BaselineOutput/Common/MultiClassNaiveBayes/MultiClassNaiveBayes-TrainTest-breast-cancer-out.txt
index 53ca634eeb..eb0275e175 100644
--- a/test/BaselineOutput/Common/MultiClassNaiveBayes/MultiClassNaiveBayes-TrainTest-breast-cancer-out.txt
+++ b/test/BaselineOutput/Common/MultiClassNaiveBayes/MultiClassNaiveBayes-TrainTest-breast-cancer-out.txt
@@ -14,9 +14,6 @@ Accuracy(micro-avg): 0.655222
 Accuracy(macro-avg): 0.500000
 Log-loss:           34.538776
 Log-loss reduction: -52.618809
-Top K accuracy(All K)K=1: 0.655222
-Top K accuracy(All K)K=2: 1.000000
-Top K accuracy(All K)K=3: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/MulticlassLogisticRegression/LogisticRegression-Non-Negative-CV-iris-out.txt b/test/BaselineOutput/Common/MulticlassLogisticRegression/LogisticRegression-Non-Negative-CV-iris-out.txt
index 9685541e34..c8e2526951 100644
--- a/test/BaselineOutput/Common/MulticlassLogisticRegression/LogisticRegression-Non-Negative-CV-iris-out.txt
+++ b/test/BaselineOutput/Common/MulticlassLogisticRegression/LogisticRegression-Non-Negative-CV-iris-out.txt
@@ -25,10 +25,6 @@ Accuracy(micro-avg): 0.962025
 Accuracy(macro-avg): 0.965079
 Log-loss:           0.129858
 Log-loss reduction: 0.880592
-Top K accuracy(All K)K=1: 0.962025
-Top K accuracy(All K)K=2: 1.000000
-Top K accuracy(All K)K=3: 1.000000
-Top K accuracy(All K)K=4: 1.000000
 
 Confusion table
           ||========================
@@ -43,10 +39,6 @@ Accuracy(micro-avg): 0.971831
 Accuracy(macro-avg): 0.966667
 Log-loss:           0.125563
 Log-loss reduction: 0.884343
-Top K accuracy(All K)K=1: 0.971831
-Top K accuracy(All K)K=2: 1.000000
-Top K accuracy(All K)K=3: 1.000000
-Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/MulticlassLogisticRegression/LogisticRegression-Non-Negative-TrainTest-iris-out.txt b/test/BaselineOutput/Common/MulticlassLogisticRegression/LogisticRegression-Non-Negative-TrainTest-iris-out.txt
index 93ec97e28c..58e1f89e5b 100644
--- a/test/BaselineOutput/Common/MulticlassLogisticRegression/LogisticRegression-Non-Negative-TrainTest-iris-out.txt
+++ b/test/BaselineOutput/Common/MulticlassLogisticRegression/LogisticRegression-Non-Negative-TrainTest-iris-out.txt
@@ -19,10 +19,6 @@ Accuracy(micro-avg): 0.980000
 Accuracy(macro-avg): 0.980000
 Log-loss:           0.095534
 Log-loss reduction: 0.913041
-Top K accuracy(All K)K=1: 0.980000
-Top K accuracy(All K)K=2: 1.000000
-Top K accuracy(All K)K=3: 1.000000
-Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/MulticlassLogisticRegression/LogisticRegression-Non-Negative-TrainTest-iris-rp.txt b/test/BaselineOutput/Common/MulticlassLogisticRegression/LogisticRegression-Non-Negative-TrainTest-iris-rp.txt
index e9489fa884..6e2c6ad943 100644
--- a/test/BaselineOutput/Common/MulticlassLogisticRegression/LogisticRegression-Non-Negative-TrainTest-iris-rp.txt
+++ b/test/BaselineOutput/Common/MulticlassLogisticRegression/LogisticRegression-Non-Negative-TrainTest-iris-rp.txt
@@ -1,4 +1,4 @@
 MulticlassLogisticRegression
 Accuracy(micro-avg)	Accuracy(macro-avg)	Log-loss	Log-loss reduction	/l2	/l1	/ot	/nt	/nn	Learner Name	Train Dataset	Test Dataset	Results File	Run Time	Physical Memory	Virtual Memory	Command Line	Settings	
-0.98	0.98	0.095534	0.913041	0.1	0.001	0.001	1	+	MulticlassLogisticRegression	%Data%	%Data%	%Output%	99	0	0	maml.exe TrainTest test=%Data% tr=MulticlassLogisticRegression{l1=0.001 l2=0.1 ot=1e-3 nt=1 nn=+} norm=No dout=%Output% data=%Data% out=%Output% seed=1 xf=Term{col=Label}	/l2:0.1;/l1:0.001;/ot:0.001;/nt:1;/nn:+	
+0.98	0.98	0.095534	0.913042	0.1	0.001	0.001	1	+	MulticlassLogisticRegression	%Data%	%Data%	%Output%	99	0	0	maml.exe TrainTest test=%Data% tr=MulticlassLogisticRegression{l1=0.001 l2=0.1 ot=1e-3 nt=1 nn=+} norm=No dout=%Output% data=%Data% out=%Output% seed=1 xf=Term{col=Label}	/l2:0.1;/l1:0.001;/ot:0.001;/nt:1;/nn:+	
 
diff --git a/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-CV-iris-out.txt b/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-CV-iris-out.txt
index b766b0ba55..99b7a4a51c 100644
--- a/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-CV-iris-out.txt
+++ b/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-CV-iris-out.txt
@@ -25,10 +25,6 @@ Accuracy(micro-avg): 0.962025
 Accuracy(macro-avg): 0.965079
 Log-loss:           0.101866
 Log-loss reduction: 0.906331
-Top K accuracy(All K)K=1: 0.962025
-Top K accuracy(All K)K=2: 1.000000
-Top K accuracy(All K)K=3: 1.000000
-Top K accuracy(All K)K=4: 1.000000
 
 Confusion table
           ||========================
@@ -43,10 +39,6 @@ Accuracy(micro-avg): 0.985915
 Accuracy(macro-avg): 0.983333
 Log-loss:           0.075812
 Log-loss reduction: 0.930169
-Top K accuracy(All K)K=1: 0.985915
-Top K accuracy(All K)K=2: 1.000000
-Top K accuracy(All K)K=3: 1.000000
-Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-CV-iris-tree-featurized-out.txt b/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-CV-iris-tree-featurized-out.txt
index 0533130117..3b5ff1562d 100644
--- a/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-CV-iris-tree-featurized-out.txt
+++ b/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-CV-iris-tree-featurized-out.txt
@@ -39,10 +39,6 @@ Accuracy(micro-avg): 0.924051
 Accuracy(macro-avg): 0.932540
 Log-loss:           0.330649
 Log-loss reduction: 0.695959
-Top K accuracy(All K)K=1: 0.924051
-Top K accuracy(All K)K=2: 1.000000
-Top K accuracy(All K)K=3: 1.000000
-Top K accuracy(All K)K=4: 1.000000
 
 Confusion table
           ||========================
@@ -57,10 +53,6 @@ Accuracy(micro-avg): 0.957746
 Accuracy(macro-avg): 0.953030
 Log-loss:           0.157832
 Log-loss reduction: 0.854620
-Top K accuracy(All K)K=1: 0.957746
-Top K accuracy(All K)K=2: 1.000000
-Top K accuracy(All K)K=3: 1.000000
-Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-CV-iris-tree-featurized-permuted-out.txt b/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-CV-iris-tree-featurized-permuted-out.txt
index 4553c97f03..c11eacd20b 100644
--- a/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-CV-iris-tree-featurized-permuted-out.txt
+++ b/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-CV-iris-tree-featurized-permuted-out.txt
@@ -39,10 +39,6 @@ Accuracy(micro-avg): 0.924051
 Accuracy(macro-avg): 0.932540
 Log-loss:           0.201590
 Log-loss reduction: 0.814633
-Top K accuracy(All K)K=1: 0.924051
-Top K accuracy(All K)K=2: 1.000000
-Top K accuracy(All K)K=3: 1.000000
-Top K accuracy(All K)K=4: 1.000000
 
 Confusion table
           ||========================
@@ -57,10 +53,6 @@ Accuracy(micro-avg): 0.971831
 Accuracy(macro-avg): 0.968182
 Log-loss:           0.101915
 Log-loss reduction: 0.906125
-Top K accuracy(All K)K=1: 0.971831
-Top K accuracy(All K)K=2: 1.000000
-Top K accuracy(All K)K=3: 1.000000
-Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-TrainTest-iris-out.txt b/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-TrainTest-iris-out.txt
index 4bd98f6839..248e2dc6c2 100644
--- a/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-TrainTest-iris-out.txt
+++ b/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-TrainTest-iris-out.txt
@@ -19,10 +19,6 @@ Accuracy(micro-avg): 0.980000
 Accuracy(macro-avg): 0.980000
 Log-loss:           0.072218
 Log-loss reduction: 0.934264
-Top K accuracy(All K)K=1: 0.980000
-Top K accuracy(All K)K=2: 1.000000
-Top K accuracy(All K)K=3: 1.000000
-Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-TrainTest-iris-tree-featurized-out.txt b/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-TrainTest-iris-tree-featurized-out.txt
index 847816539e..16e312ae3a 100644
--- a/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-TrainTest-iris-tree-featurized-out.txt
+++ b/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-TrainTest-iris-tree-featurized-out.txt
@@ -26,10 +26,6 @@ Accuracy(micro-avg): 0.980000
 Accuracy(macro-avg): 0.980000
 Log-loss:           0.048652
 Log-loss reduction: 0.955715
-Top K accuracy(All K)K=1: 0.980000
-Top K accuracy(All K)K=2: 1.000000
-Top K accuracy(All K)K=3: 1.000000
-Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-TrainTest-iris-tree-featurized-permuted-out.txt b/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-TrainTest-iris-tree-featurized-permuted-out.txt
index 511ca9fd0b..3c21d28d0b 100644
--- a/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-TrainTest-iris-tree-featurized-permuted-out.txt
+++ b/test/BaselineOutput/Common/MulticlassLogisticRegression/MulticlassLogisticRegression-TrainTest-iris-tree-featurized-permuted-out.txt
@@ -26,10 +26,6 @@ Accuracy(micro-avg): 0.973333
 Accuracy(macro-avg): 0.973333
 Log-loss:           0.052580
 Log-loss reduction: 0.952140
-Top K accuracy(All K)K=1: 0.973333
-Top K accuracy(All K)K=2: 1.000000
-Top K accuracy(All K)K=3: 1.000000
-Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/MulticlassLogisticRegression/netcoreapp31/MulticlassLogisticRegression-CV-iris-out.txt b/test/BaselineOutput/Common/MulticlassLogisticRegression/netcoreapp31/MulticlassLogisticRegression-CV-iris-out.txt
index 04fb413c9e..6f8b64ff3c 100644
--- a/test/BaselineOutput/Common/MulticlassLogisticRegression/netcoreapp31/MulticlassLogisticRegression-CV-iris-out.txt
+++ b/test/BaselineOutput/Common/MulticlassLogisticRegression/netcoreapp31/MulticlassLogisticRegression-CV-iris-out.txt
@@ -25,10 +25,6 @@ Accuracy(micro-avg): 0.962025
 Accuracy(macro-avg): 0.965079
 Log-loss:           0.101866
 Log-loss reduction: 0.906331
-Top K accuracy(All K)K=1: 0.962025
-Top K accuracy(All K)K=2: 1.000000
-Top K accuracy(All K)K=3: 1.000000
-Top K accuracy(All K)K=4: 1.000000
 
 Confusion table
           ||========================
@@ -43,10 +39,6 @@ Accuracy(micro-avg): 0.985915
 Accuracy(macro-avg): 0.983333
 Log-loss:           0.075796
 Log-loss reduction: 0.930183
-Top K accuracy(All K)K=1: 0.985915
-Top K accuracy(All K)K=2: 1.000000
-Top K accuracy(All K)K=3: 1.000000
-Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/OVA/OVA-CV-iris-out.txt b/test/BaselineOutput/Common/OVA/OVA-CV-iris-out.txt
index 6d61606b73..0469c9469e 100644
--- a/test/BaselineOutput/Common/OVA/OVA-CV-iris-out.txt
+++ b/test/BaselineOutput/Common/OVA/OVA-CV-iris-out.txt
@@ -29,10 +29,6 @@ Accuracy(micro-avg): 0.974684
 Accuracy(macro-avg): 0.977778
 Log-loss:           0.352944
 Log-loss reduction: 0.675458
-Top K accuracy(All K)K=1: 0.974684
-Top K accuracy(All K)K=2: 1.000000
-Top K accuracy(All K)K=3: 1.000000
-Top K accuracy(All K)K=4: 1.000000
 
 Confusion table
           ||========================
@@ -47,10 +43,6 @@ Accuracy(micro-avg): 0.971831
 Accuracy(macro-avg): 0.966667
 Log-loss:           0.273754
 Log-loss reduction: 0.747843
-Top K accuracy(All K)K=1: 0.971831
-Top K accuracy(All K)K=2: 1.000000
-Top K accuracy(All K)K=3: 1.000000
-Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/OVA/OVA-FastForest-CV-iris-out.txt b/test/BaselineOutput/Common/OVA/OVA-FastForest-CV-iris-out.txt
index 6c673f917a..5100848835 100644
--- a/test/BaselineOutput/Common/OVA/OVA-FastForest-CV-iris-out.txt
+++ b/test/BaselineOutput/Common/OVA/OVA-FastForest-CV-iris-out.txt
@@ -71,10 +71,6 @@ Accuracy(micro-avg): 0.924051
 Accuracy(macro-avg): 0.932540
 Log-loss:           0.197783
 Log-loss reduction: 0.818133
-Top K accuracy(All K)K=1: 0.924051
-Top K accuracy(All K)K=2: 0.987342
-Top K accuracy(All K)K=3: 1.000000
-Top K accuracy(All K)K=4: 1.000000
 
 Confusion table
           ||========================
@@ -89,10 +85,6 @@ Accuracy(micro-avg): 0.957746
 Accuracy(macro-avg): 0.953030
 Log-loss:           0.103360
 Log-loss reduction: 0.904794
-Top K accuracy(All K)K=1: 0.957746
-Top K accuracy(All K)K=2: 1.000000
-Top K accuracy(All K)K=3: 1.000000
-Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/OVA/OVA-FastForest-TrainTest-iris-out.txt b/test/BaselineOutput/Common/OVA/OVA-FastForest-TrainTest-iris-out.txt
index 21c3b24263..e988ef026c 100644
--- a/test/BaselineOutput/Common/OVA/OVA-FastForest-TrainTest-iris-out.txt
+++ b/test/BaselineOutput/Common/OVA/OVA-FastForest-TrainTest-iris-out.txt
@@ -42,10 +42,6 @@ Accuracy(micro-avg): 0.973333
 Accuracy(macro-avg): 0.973333
 Log-loss:           0.088201
 Log-loss reduction: 0.919716
-Top K accuracy(All K)K=1: 0.973333
-Top K accuracy(All K)K=2: 1.000000
-Top K accuracy(All K)K=3: 1.000000
-Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/OVA/OVA-TrainTest-iris-out.txt b/test/BaselineOutput/Common/OVA/OVA-TrainTest-iris-out.txt
index f4e290013b..a78f6c8f27 100644
--- a/test/BaselineOutput/Common/OVA/OVA-TrainTest-iris-out.txt
+++ b/test/BaselineOutput/Common/OVA/OVA-TrainTest-iris-out.txt
@@ -21,10 +21,6 @@ Accuracy(micro-avg): 0.960000
 Accuracy(macro-avg): 0.960000
 Log-loss:           0.254771
 Log-loss reduction: 0.768097
-Top K accuracy(All K)K=1: 0.960000
-Top K accuracy(All K)K=2: 1.000000
-Top K accuracy(All K)K=3: 1.000000
-Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/PKPD/PKPD-CV-iris-out.txt b/test/BaselineOutput/Common/PKPD/PKPD-CV-iris-out.txt
index 849b1d8cfd..21e6eee190 100644
--- a/test/BaselineOutput/Common/PKPD/PKPD-CV-iris-out.txt
+++ b/test/BaselineOutput/Common/PKPD/PKPD-CV-iris-out.txt
@@ -41,10 +41,6 @@ Accuracy(micro-avg): 0.974684
 Accuracy(macro-avg): 0.977778
 Log-loss:           0.359335
 Log-loss reduction: 0.669582
-Top K accuracy(All K)K=1: 0.974684
-Top K accuracy(All K)K=2: 1.000000
-Top K accuracy(All K)K=3: 1.000000
-Top K accuracy(All K)K=4: 1.000000
 
 Confusion table
           ||========================
@@ -59,10 +55,6 @@ Accuracy(micro-avg): 0.971831
 Accuracy(macro-avg): 0.966667
 Log-loss:           0.277823
 Log-loss reduction: 0.744095
-Top K accuracy(All K)K=1: 0.971831
-Top K accuracy(All K)K=2: 1.000000
-Top K accuracy(All K)K=3: 1.000000
-Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/PKPD/PKPD-TrainTest-iris-out.txt b/test/BaselineOutput/Common/PKPD/PKPD-TrainTest-iris-out.txt
index a5bca1aaa6..c509772806 100644
--- a/test/BaselineOutput/Common/PKPD/PKPD-TrainTest-iris-out.txt
+++ b/test/BaselineOutput/Common/PKPD/PKPD-TrainTest-iris-out.txt
@@ -27,10 +27,6 @@ Accuracy(micro-avg): 0.960000
 Accuracy(macro-avg): 0.960000
 Log-loss:           0.255665
 Log-loss reduction: 0.767284
-Top K accuracy(All K)K=1: 0.960000
-Top K accuracy(All K)K=2: 1.000000
-Top K accuracy(All K)K=3: 1.000000
-Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-Average-TrainTest-iris-out.txt b/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-Average-TrainTest-iris-out.txt
index 888067df15..5e299a5d5c 100644
--- a/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-Average-TrainTest-iris-out.txt
+++ b/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-Average-TrainTest-iris-out.txt
@@ -46,10 +46,6 @@ Accuracy(micro-avg): 0.946667
 Accuracy(macro-avg): 0.946667
 Log-loss:           0.433374
 Log-loss reduction: 0.605526
-Top K accuracy(All K)K=1: 0.946667
-Top K accuracy(All K)K=2: 1.000000
-Top K accuracy(All K)K=3: 1.000000
-Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-Bootstrap-TrainTest-iris-out.txt b/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-Bootstrap-TrainTest-iris-out.txt
index eb3d695b74..1446808e2b 100644
--- a/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-Bootstrap-TrainTest-iris-out.txt
+++ b/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-Bootstrap-TrainTest-iris-out.txt
@@ -136,10 +136,6 @@ Accuracy(micro-avg): 0.946667
 Accuracy(macro-avg): 0.946667
 Log-loss:           0.434962
 Log-loss reduction: 0.604081
-Top K accuracy(All K)K=1: 0.946667
-Top K accuracy(All K)K=2: 1.000000
-Top K accuracy(All K)K=3: 1.000000
-Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-SDCA-Average-TrainTest-iris-out.txt b/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-SDCA-Average-TrainTest-iris-out.txt
index 86e84cf3b6..a6c93db8f3 100644
--- a/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-SDCA-Average-TrainTest-iris-out.txt
+++ b/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-SDCA-Average-TrainTest-iris-out.txt
@@ -56,10 +56,6 @@ Accuracy(micro-avg): 0.980000
 Accuracy(macro-avg): 0.980000
 Log-loss:           0.061647
 Log-loss reduction: 0.943887
-Top K accuracy(All K)K=1: 0.980000
-Top K accuracy(All K)K=2: 1.000000
-Top K accuracy(All K)K=3: 1.000000
-Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-Stacking-TrainTest-iris-out.txt b/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-Stacking-TrainTest-iris-out.txt
index 7761b303dc..99e556e559 100644
--- a/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-Stacking-TrainTest-iris-out.txt
+++ b/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-Stacking-TrainTest-iris-out.txt
@@ -52,10 +52,6 @@ Accuracy(micro-avg): 0.900000
 Accuracy(macro-avg): 0.900000
 Log-loss:           0.431088
 Log-loss reduction: 0.607607
-Top K accuracy(All K)K=1: 0.900000
-Top K accuracy(All K)K=2: 1.000000
-Top K accuracy(All K)K=3: 1.000000
-Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-Voting-TrainTest-iris-out.txt b/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-Voting-TrainTest-iris-out.txt
index 068f262ef8..94766eed5c 100644
--- a/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-Voting-TrainTest-iris-out.txt
+++ b/test/BaselineOutput/Common/WeightedEnsembleMulticlass/WE-Voting-TrainTest-iris-out.txt
@@ -46,10 +46,6 @@ Accuracy(micro-avg): 0.946667
 Accuracy(macro-avg): 0.946667
 Log-loss:           0.511576
 Log-loss reduction: 0.534344
-Top K accuracy(All K)K=1: 0.946667
-Top K accuracy(All K)K=2: 0.986667
-Top K accuracy(All K)K=3: 1.000000
-Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/WeightedEnsembleMulticlass/netcoreapp31/WE-Average-TrainTest-iris-out.txt b/test/BaselineOutput/Common/WeightedEnsembleMulticlass/netcoreapp31/WE-Average-TrainTest-iris-out.txt
index 25e12386b9..f0bf9e77bf 100644
--- a/test/BaselineOutput/Common/WeightedEnsembleMulticlass/netcoreapp31/WE-Average-TrainTest-iris-out.txt
+++ b/test/BaselineOutput/Common/WeightedEnsembleMulticlass/netcoreapp31/WE-Average-TrainTest-iris-out.txt
@@ -46,10 +46,6 @@ Accuracy(micro-avg): 0.940000
 Accuracy(macro-avg): 0.940000
 Log-loss:           0.433907
 Log-loss reduction: 0.605040
-Top K accuracy(All K)K=1: 0.940000
-Top K accuracy(All K)K=2: 1.000000
-Top K accuracy(All K)K=3: 1.000000
-Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/WeightedEnsembleMulticlass/netcoreapp31/WE-Bootstrap-TrainTest-iris-out.txt b/test/BaselineOutput/Common/WeightedEnsembleMulticlass/netcoreapp31/WE-Bootstrap-TrainTest-iris-out.txt
index 97f32d2c65..2da2bb56f1 100644
--- a/test/BaselineOutput/Common/WeightedEnsembleMulticlass/netcoreapp31/WE-Bootstrap-TrainTest-iris-out.txt
+++ b/test/BaselineOutput/Common/WeightedEnsembleMulticlass/netcoreapp31/WE-Bootstrap-TrainTest-iris-out.txt
@@ -136,10 +136,6 @@ Accuracy(micro-avg): 0.940000
 Accuracy(macro-avg): 0.940000
 Log-loss:           0.435480
 Log-loss reduction: 0.603609
-Top K accuracy(All K)K=1: 0.940000
-Top K accuracy(All K)K=2: 1.000000
-Top K accuracy(All K)K=3: 1.000000
-Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/WeightedEnsembleMulticlass/netcoreapp31/WE-Stacking-TrainTest-iris-out.txt b/test/BaselineOutput/Common/WeightedEnsembleMulticlass/netcoreapp31/WE-Stacking-TrainTest-iris-out.txt
index e0678dcaa0..3d92491bd9 100644
--- a/test/BaselineOutput/Common/WeightedEnsembleMulticlass/netcoreapp31/WE-Stacking-TrainTest-iris-out.txt
+++ b/test/BaselineOutput/Common/WeightedEnsembleMulticlass/netcoreapp31/WE-Stacking-TrainTest-iris-out.txt
@@ -52,10 +52,6 @@ Accuracy(micro-avg): 0.900000
 Accuracy(macro-avg): 0.900000
 Log-loss:           0.431192
 Log-loss reduction: 0.607512
-Top K accuracy(All K)K=1: 0.900000
-Top K accuracy(All K)K=2: 1.000000
-Top K accuracy(All K)K=3: 1.000000
-Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------
diff --git a/test/BaselineOutput/Common/WeightedEnsembleMulticlass/netcoreapp31/WE-Voting-TrainTest-iris-out.txt b/test/BaselineOutput/Common/WeightedEnsembleMulticlass/netcoreapp31/WE-Voting-TrainTest-iris-out.txt
index f4882c6eed..cec0c9958b 100644
--- a/test/BaselineOutput/Common/WeightedEnsembleMulticlass/netcoreapp31/WE-Voting-TrainTest-iris-out.txt
+++ b/test/BaselineOutput/Common/WeightedEnsembleMulticlass/netcoreapp31/WE-Voting-TrainTest-iris-out.txt
@@ -46,10 +46,6 @@ Accuracy(micro-avg): 0.946667
 Accuracy(macro-avg): 0.946667
 Log-loss:           0.511576
 Log-loss reduction: 0.534344
-Top K accuracy(All K)K=1: 0.946667
-Top K accuracy(All K)K=2: 0.986667
-Top K accuracy(All K)K=3: 1.000000
-Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------

From bfcda2257064c7c469a0393eb2a927a9add07999 Mon Sep 17 00:00:00 2001
From: Jason DeBoever <github@deboever.us>
Date: Fri, 6 Nov 2020 16:33:01 -0500
Subject: [PATCH 14/28] fix test fails, re-add names parameter

---
 .../Evaluators/Metrics/MulticlassClassificationMetrics.cs   | 6 +++---
 .../Evaluators/MulticlassClassificationEvaluator.cs         | 2 ++
 test/Microsoft.ML.AutoML.Tests/MetricsAgentsTests.cs        | 6 +++---
 test/Microsoft.ML.AutoML.Tests/MetricsUtil.cs               | 2 +-
 4 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassificationMetrics.cs b/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassificationMetrics.cs
index f06f943027..c8f46b2f1a 100644
--- a/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassificationMetrics.cs
+++ b/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassificationMetrics.cs
@@ -121,11 +121,11 @@ internal MulticlassClassificationMetrics(IHost host, DataViewRow overallResult,
             MacroAccuracy = FetchDouble(MulticlassClassificationEvaluator.AccuracyMacro);
             LogLoss = FetchDouble(MulticlassClassificationEvaluator.LogLoss);
             LogLossReduction = FetchDouble(MulticlassClassificationEvaluator.LogLossReduction);
-            if (TopKPredictionCount > 0)
-                TopKAccuracyForAllK = RowCursorUtils.Fetch<VBuffer<double>>(host, overallResult, MulticlassClassificationEvaluator.AllTopKAccuracy).DenseValues().ToImmutableArray();
-
             TopKPredictionCount = topKPredictionCount;
 
+            if (topKPredictionCount > 0)
+                TopKAccuracyForAllK = RowCursorUtils.Fetch<VBuffer<double>>(host, overallResult, MulticlassClassificationEvaluator.AllTopKAccuracy).DenseValues().ToImmutableArray();
+
             var perClassLogLoss = RowCursorUtils.Fetch<VBuffer<double>>(host, overallResult, MulticlassClassificationEvaluator.PerClassLogLoss);
             PerClassLogLoss = perClassLogLoss.DenseValues().ToImmutableArray();
             ConfusionMatrix = MetricWriter.GetConfusionMatrix(host, confusionMatrix, binary: false, perClassLogLoss.Length);
diff --git a/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
index f080329cd7..647c787565 100644
--- a/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
+++ b/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
@@ -61,6 +61,7 @@ public enum Metrics
         internal const string LoadName = "MultiClassClassifierEvaluator";
 
         private readonly int? _outputTopKAcc;
+        private readonly bool _names;
 
         public MulticlassClassificationEvaluator(IHostEnvironment env, Arguments args)
             : base(env, LoadName)
@@ -68,6 +69,7 @@ public MulticlassClassificationEvaluator(IHostEnvironment env, Arguments args)
             Host.AssertValue(args, "args");
             Host.CheckUserArg(args.OutputTopKAcc == null || args.OutputTopKAcc > 0, nameof(args.OutputTopKAcc));
             _outputTopKAcc = args.OutputTopKAcc;
+            _names = args.Names;
         }
 
         private protected override void CheckScoreAndLabelTypes(RoleMappedSchema schema)
diff --git a/test/Microsoft.ML.AutoML.Tests/MetricsAgentsTests.cs b/test/Microsoft.ML.AutoML.Tests/MetricsAgentsTests.cs
index 0f9b336d84..72b8d33bee 100644
--- a/test/Microsoft.ML.AutoML.Tests/MetricsAgentsTests.cs
+++ b/test/Microsoft.ML.AutoML.Tests/MetricsAgentsTests.cs
@@ -61,7 +61,7 @@ public void BinaryMetricsPerfectTest()
         [Fact]
         public void MulticlassMetricsGetScoreTest()
         {
-            var metrics = MetricsUtil.CreateMulticlassClassificationMetrics(0.1, 0.2, 0.3, 0.4, 0, 0.5, new double[] {});
+            var metrics = MetricsUtil.CreateMulticlassClassificationMetrics(0.1, 0.2, 0.3, 0.4, 0, new double[] {0.5}, new double[] {});
             Assert.Equal(0.1, GetScore(metrics, MulticlassClassificationMetric.MicroAccuracy));
             Assert.Equal(0.2, GetScore(metrics, MulticlassClassificationMetric.MacroAccuracy));
             Assert.Equal(0.3, GetScore(metrics, MulticlassClassificationMetric.LogLoss));
@@ -72,7 +72,7 @@ public void MulticlassMetricsGetScoreTest()
         [Fact]
         public void MulticlassMetricsNonPerfectTest()
         {
-            var metrics = MetricsUtil.CreateMulticlassClassificationMetrics(0.1, 0.2, 0.3, 0.4, 0, 0.5, new double[] { });
+            var metrics = MetricsUtil.CreateMulticlassClassificationMetrics(0.1, 0.2, 0.3, 0.4, 0, new double[] { 0.5 }, new double[] { });
             Assert.False(IsPerfectModel(metrics, MulticlassClassificationMetric.MacroAccuracy));
             Assert.False(IsPerfectModel(metrics, MulticlassClassificationMetric.MicroAccuracy));
             Assert.False(IsPerfectModel(metrics, MulticlassClassificationMetric.LogLoss));
@@ -83,7 +83,7 @@ public void MulticlassMetricsNonPerfectTest()
         [Fact]
         public void MulticlassMetricsPerfectTest()
         {
-            var metrics = MetricsUtil.CreateMulticlassClassificationMetrics(1, 1, 0, 1, 0, 1, new double[] { });
+            var metrics = MetricsUtil.CreateMulticlassClassificationMetrics(1, 1, 0, 1, 0, new double[] { 1 }, new double[] { });
             Assert.True(IsPerfectModel(metrics, MulticlassClassificationMetric.MicroAccuracy));
             Assert.True(IsPerfectModel(metrics, MulticlassClassificationMetric.MacroAccuracy));
             Assert.True(IsPerfectModel(metrics, MulticlassClassificationMetric.LogLoss));
diff --git a/test/Microsoft.ML.AutoML.Tests/MetricsUtil.cs b/test/Microsoft.ML.AutoML.Tests/MetricsUtil.cs
index 828eccf9d2..3f306fdcbe 100644
--- a/test/Microsoft.ML.AutoML.Tests/MetricsUtil.cs
+++ b/test/Microsoft.ML.AutoML.Tests/MetricsUtil.cs
@@ -21,7 +21,7 @@ public static BinaryClassificationMetrics CreateBinaryClassificationMetrics(
 
         public static MulticlassClassificationMetrics CreateMulticlassClassificationMetrics(
             double accuracyMicro, double accuracyMacro, double logLoss,
-            double logLossReduction, int topK, double topKAccuracy,
+            double logLossReduction, int topK, double[] topKAccuracy,
             double[] perClassLogLoss)
         {
             return CreateInstance<MulticlassClassificationMetrics>(accuracyMicro,

From 563768c68a21040feeced8de94f0fd3fc3aa9a1a Mon Sep 17 00:00:00 2001
From: Jason DeBoever <github@deboever.us>
Date: Fri, 6 Nov 2020 17:34:26 -0500
Subject: [PATCH 15/28] Clean up commented code

---
 .../Evaluators/MulticlassClassificationEvaluator.cs            | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
index 647c787565..eecbda14e2 100644
--- a/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
+++ b/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
@@ -494,8 +494,7 @@ public override void ProcessRow()
                 // The rank will be from 0 to N. (Not N-1). Rank N is used for unrecognized values.
                 // Situation: What if we have probabilities that are equal to the correct prediction (eg, a:0.1, b:0.1, c:0.1, d:0.6, e:0.1 where c is the correct label).
                 // This actually happens a lot with some models. We handle ties by assigning rank in order of first appearance. In this example, we assign c the rank of 3, because d has a higher probability and a and b are sequentially first.
-                int rankofCorrectLabel = 0;
-                //float highestProb = 0;
+                int rankofCorrectLabel = 0;                
                 int assigned = -1;
                 for (int i=0; i < _scoresArr.Length; i++)
                 {

From 4a5597aa55557820e1f45306c251379611970813 Mon Sep 17 00:00:00 2001
From: Jason DeBoever <github@deboever.us>
Date: Fri, 6 Nov 2020 17:45:08 -0500
Subject: [PATCH 16/28] that'll teach me to edit from the github webpage

---
 .../Evaluators/MulticlassClassificationEvaluator.cs             | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
index eecbda14e2..837b31e37d 100644
--- a/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
+++ b/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
@@ -494,7 +494,7 @@ public override void ProcessRow()
                 // The rank will be from 0 to N. (Not N-1). Rank N is used for unrecognized values.
                 // Situation: What if we have probabilities that are equal to the correct prediction (eg, a:0.1, b:0.1, c:0.1, d:0.6, e:0.1 where c is the correct label).
                 // This actually happens a lot with some models. We handle ties by assigning rank in order of first appearance. In this example, we assign c the rank of 3, because d has a higher probability and a and b are sequentially first.
-                int rankofCorrectLabel = 0;                
+                int rankofCorrectLabel = 0;
                 int assigned = -1;
                 for (int i=0; i < _scoresArr.Length; i++)
                 {

From 71390bdadaf9f80b7e43b2cf1d64d24c311474a7 Mon Sep 17 00:00:00 2001
From: Jason DeBoever <github@deboever.us>
Date: Thu, 19 Nov 2020 16:48:35 -0500
Subject: [PATCH 17/28] use existing method, fix nits

---
 .../Experiment/Runners/CrossValSummaryRunner.cs               | 4 +---
 .../Evaluators/Metrics/MulticlassClassificationMetrics.cs     | 2 +-
 .../Evaluators/MulticlassClassificationEvaluator.cs           | 4 ++--
 3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/Microsoft.ML.AutoML/Experiment/Runners/CrossValSummaryRunner.cs b/src/Microsoft.ML.AutoML/Experiment/Runners/CrossValSummaryRunner.cs
index fc911f52f6..873a64067f 100644
--- a/src/Microsoft.ML.AutoML/Experiment/Runners/CrossValSummaryRunner.cs
+++ b/src/Microsoft.ML.AutoML/Experiment/Runners/CrossValSummaryRunner.cs
@@ -124,8 +124,7 @@ private static TMetrics GetAverageMetrics(IEnumerable<TMetrics> metrics, TMetric
                     logLoss: GetAverageOfNonNaNScores(newMetrics.Select(x => x.LogLoss)),
                     logLossReduction: GetAverageOfNonNaNScores(newMetrics.Select(x => x.LogLossReduction)),
                     topKPredictionCount: newMetrics.ElementAt(0).TopKPredictionCount,
-                    //need to "transpose"/rotate this array of arrays so we can average across all top 0, top 1, top 2, etc
-                    topKAccuracies: newMetrics.SelectMany(nm => nm.TopKAccuracyForAllK.Select((double tk, int i) => (i, tk))).ToLookup(itk => itk.i, itk => itk.tk).Select(tk => GetAverageOfNonNaNScores(tk)).ToArray(),
+                    topKAccuracies: GetAverageOfNonNaNScoresInNestedEnumerable(newMetrics.Select(x => x.TopKAccuracyForAllK)),
                     perClassLogLoss: (metricsClosestToAvg as MulticlassClassificationMetrics).PerClassLogLoss.ToArray(),
                     confusionMatrix: (metricsClosestToAvg as MulticlassClassificationMetrics).ConfusionMatrix) ;
                 return result as TMetrics;
@@ -165,7 +164,6 @@ private static double[] GetAverageOfNonNaNScoresInNestedEnumerable(IEnumerable<I
             double[] arr = new double[results.ElementAt(0).Count()];
             for (int i = 0; i < arr.Length; i++)
             {
-                Contracts.Assert(arr.Length == results.ElementAt(i).Count());
                 arr[i] = GetAverageOfNonNaNScores(results.Select(x => x.ElementAt(i)));
             }
             return arr;
diff --git a/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassificationMetrics.cs b/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassificationMetrics.cs
index c8f46b2f1a..0e2db6f75b 100644
--- a/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassificationMetrics.cs
+++ b/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassificationMetrics.cs
@@ -79,7 +79,7 @@ public sealed class MulticlassClassificationMetrics
         public double TopKAccuracy => TopKAccuracyForAllK?.LastOrDefault() ?? 0;
 
         /// <summary>
-        /// If positive, this indicates the K in <see cref="TopKAccuracy"/> and <see cref="TopKAccuracy"/>.
+        /// If positive, this indicates the K in <see cref="TopKAccuracy"/> and <see cref="TopKAccuracyForAllK"/>.
         /// </summary>
         public int TopKPredictionCount { get; }
 
diff --git a/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
index 837b31e37d..ea3fecdb69 100644
--- a/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
+++ b/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
@@ -495,14 +495,14 @@ public override void ProcessRow()
                 // Situation: What if we have probabilities that are equal to the correct prediction (eg, a:0.1, b:0.1, c:0.1, d:0.6, e:0.1 where c is the correct label).
                 // This actually happens a lot with some models. We handle ties by assigning rank in order of first appearance. In this example, we assign c the rank of 3, because d has a higher probability and a and b are sequentially first.
                 int rankofCorrectLabel = 0;
-                int assigned = -1;
+                int assigned = 0;
                 for (int i=0; i < _scoresArr.Length; i++)
                 {
                     if ( _scoresArr[i] > correctProba || (_scoresArr[i] == correctProba && i < intLabel))
                         rankofCorrectLabel++;
 
                     //This is the assigned "prediction" of the model if it has the highest probability.
-                    if (assigned < 0 || _scoresArr[assigned] < _scoresArr[i] )
+                    if ( _scoresArr[assigned] < _scoresArr[i] )
                         assigned = i;
                 }
 

From 32ab9faa4cfdbd9c9a7cc0d350dca57564ca4408 Mon Sep 17 00:00:00 2001
From: Jason DeBoever <github@deboever.us>
Date: Thu, 19 Nov 2020 19:48:53 -0500
Subject: [PATCH 18/28] Slight comment change

---
 .../Evaluators/Metrics/MulticlassClassificationMetrics.cs       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassificationMetrics.cs b/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassificationMetrics.cs
index 0e2db6f75b..205f5237e4 100644
--- a/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassificationMetrics.cs
+++ b/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassificationMetrics.cs
@@ -74,7 +74,7 @@ public sealed class MulticlassClassificationMetrics
 
         /// <summary>
         /// Convenience method for "TopKAccuracyForAllK[TopKPredictionCount - 1]". If <see cref="TopKPredictionCount"/> is positive, this is the relative number of examples where
-        /// the true label is one of the top-k predicted labels by the predictor.
+        /// the true label is one of the top K predicted labels by the predictor.
         /// </summary>
         public double TopKAccuracy => TopKAccuracyForAllK?.LastOrDefault() ?? 0;
 

From db2b6b537b2da7e150cc499cd234c70ba6dc1705 Mon Sep 17 00:00:00 2001
From: Jason DeBoever <github@deboever.us>
Date: Sat, 21 Nov 2020 10:11:50 -0500
Subject: [PATCH 19/28] Comment change / Touch to kick off build pipeline

---
 .../Evaluators/Metrics/MulticlassClassificationMetrics.cs      | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassificationMetrics.cs b/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassificationMetrics.cs
index 205f5237e4..1530cae355 100644
--- a/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassificationMetrics.cs
+++ b/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassificationMetrics.cs
@@ -73,7 +73,8 @@ public sealed class MulticlassClassificationMetrics
         public double MicroAccuracy { get; }
 
         /// <summary>
-        /// Convenience method for "TopKAccuracyForAllK[TopKPredictionCount - 1]". If <see cref="TopKPredictionCount"/> is positive, this is the relative number of examples where
+        /// Convenience method for "TopKAccuracyForAllK[TopKPredictionCount - 1]". If <see cref="TopKPredictionCount"/> is positive, 
+        /// this is the relative number of examples where
         /// the true label is one of the top K predicted labels by the predictor.
         /// </summary>
         public double TopKAccuracy => TopKAccuracyForAllK?.LastOrDefault() ?? 0;

From 0d0493ba792db9cb7ba4ba4c74980882ccad002d Mon Sep 17 00:00:00 2001
From: Jason DeBoever <github@deboever.us>
Date: Mon, 23 Nov 2020 14:22:20 -0500
Subject: [PATCH 20/28] fix whitespace

---
 .../Evaluators/Metrics/MulticlassClassificationMetrics.cs       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassificationMetrics.cs b/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassificationMetrics.cs
index 1530cae355..c6d2495506 100644
--- a/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassificationMetrics.cs
+++ b/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassificationMetrics.cs
@@ -73,7 +73,7 @@ public sealed class MulticlassClassificationMetrics
         public double MicroAccuracy { get; }
 
         /// <summary>
-        /// Convenience method for "TopKAccuracyForAllK[TopKPredictionCount - 1]". If <see cref="TopKPredictionCount"/> is positive, 
+        /// Convenience method for "TopKAccuracyForAllK[TopKPredictionCount - 1]". If <see cref="TopKPredictionCount"/> is positive,
         /// this is the relative number of examples where
         /// the true label is one of the top K predicted labels by the predictor.
         /// </summary>

From 05e7f916aae1acc2b4212eb75e482590710e211e Mon Sep 17 00:00:00 2001
From: Antonio Velazquez <anvelazq@microsoft.com>
Date: Thu, 3 Dec 2020 16:55:12 -0800
Subject: [PATCH 21/28] Added new test

---
 test/Microsoft.ML.Tests/EvaluateTests.cs | 54 ++++++++++++++++++++++++
 1 file changed, 54 insertions(+)
 create mode 100644 test/Microsoft.ML.Tests/EvaluateTests.cs

diff --git a/test/Microsoft.ML.Tests/EvaluateTests.cs b/test/Microsoft.ML.Tests/EvaluateTests.cs
new file mode 100644
index 0000000000..8bf020fe7f
--- /dev/null
+++ b/test/Microsoft.ML.Tests/EvaluateTests.cs
@@ -0,0 +1,54 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using Microsoft.ML.Data;
+using Microsoft.ML.TestFramework;
+using Xunit;
+using Xunit.Abstractions;
+
+namespace Microsoft.ML.Tests
+{
+    public class EvaluateTests : BaseTestClass
+    {
+        public EvaluateTests(ITestOutputHelper output)
+            : base(output)
+        {
+        }
+
+        public class MulticlassEvaluatorInput
+        {
+            [KeyType(4)]
+            public uint Label { get; set; }
+
+            [VectorType(4)]
+            public float[] Score { get; set; }
+
+            [KeyType(4)]
+            public uint PredictedLabel { get; set; }
+        }
+
+        [Fact]
+        public void MulticlassEvaluatorTopKArray()
+        {
+            var mlContext = new MLContext(seed: 1);
+
+            // Notice that the probability assigned to the correct label (i.e. Score[0])
+            // decreases on each row so as to get the expected TopK accuracy array hardcoded below.
+            var inputArray = new[]
+            {
+                new MulticlassEvaluatorInput{Label = 0, Score = new[] {0.4f, 0.3f, 0.2f, 0.1f }, PredictedLabel=0},
+                new MulticlassEvaluatorInput{Label = 0, Score = new[] {0.3f, 0.4f, 0.2f, 0.1f }, PredictedLabel=1},
+                new MulticlassEvaluatorInput{Label = 0, Score = new[] {0.2f, 0.3f, 0.4f, 0.1f }, PredictedLabel=2},
+                new MulticlassEvaluatorInput{Label = 0, Score = new[] {0.1f, 0.3f, 0.2f, 0.4f }, PredictedLabel=3}
+            };
+
+            var expectedTopKArray = new[] { 0.25d, 0.5d, 0.75d, 1.0d };
+
+            var inputDV = mlContext.Data.LoadFromEnumerable(inputArray);
+            var metrics = mlContext.MulticlassClassification.Evaluate(inputDV, topKPredictionCount: 4);
+
+            Assert.Equal(expectedTopKArray, metrics.TopKAccuracyForAllK);
+        }
+    }
+}

From 49786ed56742d2ec57912974b5ce9cc94e2a8060 Mon Sep 17 00:00:00 2001
From: Justin Ormont <justinormont@users.noreply.github.com>
Date: Tue, 8 Dec 2020 04:47:30 -0800
Subject: [PATCH 22/28] Code formatting nits

---
 .../Runners/CrossValSummaryRunner.cs          |  3 +--
 .../MulticlassClassificationEvaluator.cs      | 24 ++++++++++++-------
 test/Microsoft.ML.Tests/EvaluateTests.cs      |  8 +++----
 3 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/src/Microsoft.ML.AutoML/Experiment/Runners/CrossValSummaryRunner.cs b/src/Microsoft.ML.AutoML/Experiment/Runners/CrossValSummaryRunner.cs
index 540398ff2d..e0862a1fc0 100644
--- a/src/Microsoft.ML.AutoML/Experiment/Runners/CrossValSummaryRunner.cs
+++ b/src/Microsoft.ML.AutoML/Experiment/Runners/CrossValSummaryRunner.cs
@@ -126,9 +126,8 @@ private static TMetrics GetAverageMetrics(IEnumerable<TMetrics> metrics, TMetric
                     topKPredictionCount: newMetrics.ElementAt(0).TopKPredictionCount,
                     topKAccuracies: GetAverageOfNonNaNScoresInNestedEnumerable(newMetrics.Select(x => x.TopKAccuracyForAllK)),
                     perClassLogLoss: (metricsClosestToAvg as MulticlassClassificationMetrics).PerClassLogLoss.ToArray(),
-                    confusionMatrix: (metricsClosestToAvg as MulticlassClassificationMetrics).ConfusionMatrix) ;
+                    confusionMatrix: (metricsClosestToAvg as MulticlassClassificationMetrics).ConfusionMatrix);
                 return result as TMetrics;
-
             }
 
             if (typeof(TMetrics) == typeof(RegressionMetrics))
diff --git a/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
index ea3fecdb69..dd49ed25d2 100644
--- a/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
+++ b/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
@@ -309,7 +309,7 @@ public double Reduction
                     }
                 }
 
-                public double TopKAccuracy => !(OutputTopKAcc is null) ? AllTopKAccuracy[OutputTopKAcc.Value-1] : 0d;
+                public double TopKAccuracy => !(OutputTopKAcc is null) ? AllTopKAccuracy[OutputTopKAcc.Value - 1] : 0d;
                 public double[] AllTopKAccuracy => CumulativeSum(_seenRanks.Take(OutputTopKAcc ?? 0).Select(l => l / (double)(_numInstances - _numUnknownClassInstances))).ToArray();
 
                 // The per class average log loss is calculated by dividing the weighted sum of the log loss of examples
@@ -490,19 +490,25 @@ public override void ProcessRow()
                 // Get the probability that the CORRECT label has: (best case is that it's the highest probability):
                 var correctProba = !wasKnownLabel ? 0 : _scoresArr[intLabel];
 
-                // Find the rank of the *correct* label (in _scoresArr[]). If the correct (ground truth) labels gets rank 0, it means the model assigned it the highest probability (that's ideal). Rank 1 would mean our model gives the real label the 2nd highest probabality, etc.
+                // Find the rank of the *correct* label (in _scoresArr[]). If the correct (ground truth) labels gets rank 0,
+                // it means the model assigned it the highest probability (that's ideal). Rank 1 would mean our model 
+                // gives the real label the 2nd highest probabality, etc.
                 // The rank will be from 0 to N. (Not N-1). Rank N is used for unrecognized values.
-                // Situation: What if we have probabilities that are equal to the correct prediction (eg, a:0.1, b:0.1, c:0.1, d:0.6, e:0.1 where c is the correct label).
-                // This actually happens a lot with some models. We handle ties by assigning rank in order of first appearance. In this example, we assign c the rank of 3, because d has a higher probability and a and b are sequentially first.
-                int rankofCorrectLabel = 0;
+                //
+                // Tie breaking: What if we have probabilities that are equal to the correct prediction (eg, a:0.1, b:0.1, 
+                // c:0.1, d:0.6, e:0.1 where c is the correct label).
+                // This actually happens a lot with some models. We handle ties by assigning rank in order of first
+                // appearance. In this example, we assign c the rank of 3, because d has a higher probability and a and b
+                //  are sequentially first.
+                int rankOfCorrectLabel = 0;
                 int assigned = 0;
                 for (int i=0; i < _scoresArr.Length; i++)
                 {
-                    if ( _scoresArr[i] > correctProba || (_scoresArr[i] == correctProba && i < intLabel))
-                        rankofCorrectLabel++;
+                    if (_scoresArr[i] > correctProba || (_scoresArr[i] == correctProba && i < intLabel))
+                        rankOfCorrectLabel++;
 
-                    //This is the assigned "prediction" of the model if it has the highest probability.
-                    if ( _scoresArr[assigned] < _scoresArr[i] )
+                    // This is the assigned "prediction" of the model if it has the highest probability.
+                    if (_scoresArr[assigned] < _scoresArr[i])
                         assigned = i;
                 }
 
diff --git a/test/Microsoft.ML.Tests/EvaluateTests.cs b/test/Microsoft.ML.Tests/EvaluateTests.cs
index 8bf020fe7f..cfaea5460e 100644
--- a/test/Microsoft.ML.Tests/EvaluateTests.cs
+++ b/test/Microsoft.ML.Tests/EvaluateTests.cs
@@ -37,10 +37,10 @@ public void MulticlassEvaluatorTopKArray()
             // decreases on each row so as to get the expected TopK accuracy array hardcoded below.
             var inputArray = new[]
             {
-                new MulticlassEvaluatorInput{Label = 0, Score = new[] {0.4f, 0.3f, 0.2f, 0.1f }, PredictedLabel=0},
-                new MulticlassEvaluatorInput{Label = 0, Score = new[] {0.3f, 0.4f, 0.2f, 0.1f }, PredictedLabel=1},
-                new MulticlassEvaluatorInput{Label = 0, Score = new[] {0.2f, 0.3f, 0.4f, 0.1f }, PredictedLabel=2},
-                new MulticlassEvaluatorInput{Label = 0, Score = new[] {0.1f, 0.3f, 0.2f, 0.4f }, PredictedLabel=3}
+                new MulticlassEvaluatorInput{Label = 0, Score = new[] {0.4f, 0.3f, 0.2f, 0.1f}, PredictedLabel = 0},
+                new MulticlassEvaluatorInput{Label = 0, Score = new[] {0.3f, 0.4f, 0.2f, 0.1f}, PredictedLabel = 1},
+                new MulticlassEvaluatorInput{Label = 0, Score = new[] {0.2f, 0.3f, 0.4f, 0.1f}, PredictedLabel = 2},
+                new MulticlassEvaluatorInput{Label = 0, Score = new[] {0.1f, 0.3f, 0.2f, 0.4f}, PredictedLabel = 3}
             };
 
             var expectedTopKArray = new[] { 0.25d, 0.5d, 0.75d, 1.0d };

From 9259031f81b2a6e7507c5a0eef770e26f4163c04 Mon Sep 17 00:00:00 2001
From: Justin Ormont <justinormont@users.noreply.github.com>
Date: Tue, 8 Dec 2020 05:15:53 -0800
Subject: [PATCH 23/28] Code formatting nit

---
 src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs b/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs
index bf9d9a95c8..96ee121400 100644
--- a/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs
+++ b/src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs
@@ -1037,7 +1037,7 @@ private static List<string> GetMetricNames(IChannel ch, DataViewSchema schema, D
                     foreach (var name in names.Items(all: true))
                     {
                         var tryNaming = string.Format(metricName, name.Value);
-                        if (tryNaming == metricName) //metricName wasn't a format string, so just append slotname
+                        if (tryNaming == metricName) // metricName wasn't a format string, so just append slotname
                             tryNaming = (string.Format("{0}{1}", metricName, name.Value));
 
                         metricNames.Add(tryNaming);

From 98458ba3b933abcbb820b2064ce2286653dceea5 Mon Sep 17 00:00:00 2001
From: Antonio Velazquez <anvelazq@microsoft.com>
Date: Tue, 8 Dec 2020 11:34:47 -0800
Subject: [PATCH 24/28] Fixed undefined rankofCorrectLabel and trailing
 whitespace warning

---
 .../Evaluators/MulticlassClassificationEvaluator.cs       | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
index dd49ed25d2..b2a6ae92fd 100644
--- a/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
+++ b/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
@@ -491,11 +491,11 @@ public override void ProcessRow()
                 var correctProba = !wasKnownLabel ? 0 : _scoresArr[intLabel];
 
                 // Find the rank of the *correct* label (in _scoresArr[]). If the correct (ground truth) labels gets rank 0,
-                // it means the model assigned it the highest probability (that's ideal). Rank 1 would mean our model 
+                // it means the model assigned it the highest probability (that's ideal). Rank 1 would mean our model
                 // gives the real label the 2nd highest probabality, etc.
                 // The rank will be from 0 to N. (Not N-1). Rank N is used for unrecognized values.
                 //
-                // Tie breaking: What if we have probabilities that are equal to the correct prediction (eg, a:0.1, b:0.1, 
+                // Tie breaking: What if we have probabilities that are equal to the correct prediction (eg, a:0.1, b:0.1,
                 // c:0.1, d:0.6, e:0.1 where c is the correct label).
                 // This actually happens a lot with some models. We handle ties by assigning rank in order of first
                 // appearance. In this example, we assign c the rank of 3, because d has a higher probability and a and b
@@ -512,9 +512,9 @@ public override void ProcessRow()
                         assigned = i;
                 }
 
-                UnweightedCounters.Update(rankofCorrectLabel, assigned, logloss, intLabel, 1);
+                UnweightedCounters.Update(rankOfCorrectLabel, assigned, logloss, intLabel, 1);
                 if (WeightedCounters != null)
-                    WeightedCounters.Update(rankofCorrectLabel, assigned, logloss, intLabel, weight);
+                    WeightedCounters.Update(rankOfCorrectLabel, assigned, logloss, intLabel, weight);
             }
 
             protected override List<string> GetWarningsCore()

From 86f5c3f5a63e66f0ce38036580ee1bbcc5bd349e Mon Sep 17 00:00:00 2001
From: Antonio Velazquez <anvelazq@microsoft.com>
Date: Tue, 8 Dec 2020 14:35:03 -0800
Subject: [PATCH 25/28] Removed _numUnknownClassInstances and added test for
 unknown labels

---
 .../MulticlassClassificationEvaluator.cs      |  7 +-----
 test/Microsoft.ML.Tests/EvaluateTests.cs      | 25 ++++++++++++++-----
 2 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
index b2a6ae92fd..dc9508aae1 100644
--- a/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
+++ b/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
@@ -262,7 +262,6 @@ public sealed class Counters
                 private double _totalLogLoss;
                 private double _numInstances;
                 private double _numCorrect;
-                private int _numUnknownClassInstances;
                 private readonly double[] _sumWeightsOfClass;
                 private readonly double[] _totalPerClassLogLoss;
                 private readonly long[] _seenRanks;
@@ -310,7 +309,7 @@ public double Reduction
                 }
 
                 public double TopKAccuracy => !(OutputTopKAcc is null) ? AllTopKAccuracy[OutputTopKAcc.Value - 1] : 0d;
-                public double[] AllTopKAccuracy => CumulativeSum(_seenRanks.Take(OutputTopKAcc ?? 0).Select(l => l / (double)(_numInstances - _numUnknownClassInstances))).ToArray();
+                public double[] AllTopKAccuracy => CumulativeSum(_seenRanks.Take(OutputTopKAcc ?? 0).Select(l => l / (double)(_numInstances))).ToArray();
 
                 // The per class average log loss is calculated by dividing the weighted sum of the log loss of examples
                 // in each class by the total weight of examples in that class.
@@ -362,10 +361,6 @@ public void Update(int seenRank, int assigned, double loglossCurr, int label, fl
                     {
                         ConfusionTable[label][assigned] += weight;
                     }
-                    else
-                    {
-                        _numUnknownClassInstances++;
-                    }
                 }
 
                 private static IEnumerable<double> CumulativeSum(IEnumerable<double> s)
diff --git a/test/Microsoft.ML.Tests/EvaluateTests.cs b/test/Microsoft.ML.Tests/EvaluateTests.cs
index cfaea5460e..ee5c58016f 100644
--- a/test/Microsoft.ML.Tests/EvaluateTests.cs
+++ b/test/Microsoft.ML.Tests/EvaluateTests.cs
@@ -2,6 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
+using System.Linq;
 using Microsoft.ML.Data;
 using Microsoft.ML.TestFramework;
 using Xunit;
@@ -18,14 +19,12 @@ public EvaluateTests(ITestOutputHelper output)
 
         public class MulticlassEvaluatorInput
         {
-            [KeyType(4)]
-            public uint Label { get; set; }
+            public float Label { get; set; }
 
             [VectorType(4)]
             public float[] Score { get; set; }
 
-            [KeyType(4)]
-            public uint PredictedLabel { get; set; }
+            public float PredictedLabel { get; set; }
         }
 
         [Fact]
@@ -40,15 +39,29 @@ public void MulticlassEvaluatorTopKArray()
                 new MulticlassEvaluatorInput{Label = 0, Score = new[] {0.4f, 0.3f, 0.2f, 0.1f}, PredictedLabel = 0},
                 new MulticlassEvaluatorInput{Label = 0, Score = new[] {0.3f, 0.4f, 0.2f, 0.1f}, PredictedLabel = 1},
                 new MulticlassEvaluatorInput{Label = 0, Score = new[] {0.2f, 0.3f, 0.4f, 0.1f}, PredictedLabel = 2},
-                new MulticlassEvaluatorInput{Label = 0, Score = new[] {0.1f, 0.3f, 0.2f, 0.4f}, PredictedLabel = 3}
+                new MulticlassEvaluatorInput{Label = 0, Score = new[] {0.1f, 0.3f, 0.2f, 0.4f}, PredictedLabel = 3},
             };
 
             var expectedTopKArray = new[] { 0.25d, 0.5d, 0.75d, 1.0d };
 
             var inputDV = mlContext.Data.LoadFromEnumerable(inputArray);
             var metrics = mlContext.MulticlassClassification.Evaluate(inputDV, topKPredictionCount: 4);
-
             Assert.Equal(expectedTopKArray, metrics.TopKAccuracyForAllK);
+
+
+            // After introducing a sample whose label was unseen (i.e. the Score array doesn't assign it a probability)
+            // then the Top K array changes, as its values are divided by the total number of instances
+            // that were evaluated.
+            var inputArray2 = inputArray.AppendElement(new MulticlassEvaluatorInput {
+                Label = 5, Score = new[] { 0.1f, 0.3f, 0.2f, 0.4f }, PredictedLabel = 3 });
+            
+            var expectedTopKArray2 = new[] { 0.2d, 0.4d, 0.6d, 0.8d };
+
+            var inputDV2 = mlContext.Data.LoadFromEnumerable(inputArray2);
+            var metrics2 = mlContext.MulticlassClassification.Evaluate(inputDV2, topKPredictionCount: 4);
+            var outpu2 = metrics2.TopKAccuracyForAllK.ToArray();
+            for (int i = 0; i < expectedTopKArray2.Length; i++)
+                Assert.Equal(expectedTopKArray2[i], outpu2[i], precision: 7);
         }
     }
 }

From 741e9fbefcfa479db2bb92b0a775baaa6d08c473 Mon Sep 17 00:00:00 2001
From: Antonio Velazquez <anvelazq@microsoft.com>
Date: Tue, 8 Dec 2020 14:43:39 -0800
Subject: [PATCH 26/28] Add weight to seenRanks

---
 .../Evaluators/MulticlassClassificationEvaluator.cs         | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
index dc9508aae1..1b479e3f5f 100644
--- a/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
+++ b/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
@@ -264,7 +264,7 @@ public sealed class Counters
                 private double _numCorrect;
                 private readonly double[] _sumWeightsOfClass;
                 private readonly double[] _totalPerClassLogLoss;
-                private readonly long[] _seenRanks;
+                private readonly double[] _seenRanks;
 
                 public readonly double[][] ConfusionTable;
 
@@ -335,7 +335,7 @@ public Counters(int numClasses, int? outputTopKAcc)
                     for (int i = 0; i < ConfusionTable.Length; i++)
                         ConfusionTable[i] = new double[numClasses];
 
-                    _seenRanks = new long[numClasses + 1];
+                    _seenRanks = new double[numClasses + 1];
                 }
 
                 public void Update(int seenRank, int assigned, double loglossCurr, int label, float weight)
@@ -350,7 +350,7 @@ public void Update(int seenRank, int assigned, double loglossCurr, int label, fl
                     if (label < _numClasses)
                         _totalPerClassLogLoss[label] += loglossCurr * weight;
 
-                    _seenRanks[seenRank]++;
+                    _seenRanks[seenRank]+=weight;
 
                     if (seenRank == 0) //prediction matched label
                     {

From dadf7931fd16dab744a9ae0260401cd20240e5a9 Mon Sep 17 00:00:00 2001
From: Antonio Velazquez <anvelazq@microsoft.com>
Date: Tue, 8 Dec 2020 16:06:46 -0800
Subject: [PATCH 27/28] Nits

---
 .../Evaluators/MulticlassClassificationEvaluator.cs         | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
index 1b479e3f5f..f084867b96 100644
--- a/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
+++ b/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
@@ -309,7 +309,7 @@ public double Reduction
                 }
 
                 public double TopKAccuracy => !(OutputTopKAcc is null) ? AllTopKAccuracy[OutputTopKAcc.Value - 1] : 0d;
-                public double[] AllTopKAccuracy => CumulativeSum(_seenRanks.Take(OutputTopKAcc ?? 0).Select(l => l / (double)(_numInstances))).ToArray();
+                public double[] AllTopKAccuracy => CumulativeSum(_seenRanks.Take(OutputTopKAcc ?? 0).Select(l => l / _numInstances)).ToArray();
 
                 // The per class average log loss is calculated by dividing the weighted sum of the log loss of examples
                 // in each class by the total weight of examples in that class.
@@ -350,9 +350,9 @@ public void Update(int seenRank, int assigned, double loglossCurr, int label, fl
                     if (label < _numClasses)
                         _totalPerClassLogLoss[label] += loglossCurr * weight;
 
-                    _seenRanks[seenRank]+=weight;
+                    _seenRanks[seenRank] += weight;
 
-                    if (seenRank == 0) //prediction matched label
+                    if (seenRank == 0) // Prediction matched label
                     {
                         _numCorrect += weight;
                         ConfusionTable[label][label] += weight;

From 9e67751410b06838f5bf76b747e6c52e8c9109b5 Mon Sep 17 00:00:00 2001
From: Antonio Velazquez <anvelazq@microsoft.com>
Date: Tue, 8 Dec 2020 16:11:53 -0800
Subject: [PATCH 28/28] Removed FastTree import

---
 .../Experiment/Runners/CrossValSummaryRunner.cs                  | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Microsoft.ML.AutoML/Experiment/Runners/CrossValSummaryRunner.cs b/src/Microsoft.ML.AutoML/Experiment/Runners/CrossValSummaryRunner.cs
index e0862a1fc0..0079be3ade 100644
--- a/src/Microsoft.ML.AutoML/Experiment/Runners/CrossValSummaryRunner.cs
+++ b/src/Microsoft.ML.AutoML/Experiment/Runners/CrossValSummaryRunner.cs
@@ -8,7 +8,6 @@
 using System.Linq;
 using Microsoft.ML.Data;
 using Microsoft.ML.Runtime;
-using Microsoft.ML.Trainers.FastTree;
 
 namespace Microsoft.ML.AutoML
 {