From bf9d946227c2180b63661a417c15c6cfc909bd14 Mon Sep 17 00:00:00 2001
From: Shahab Moradi <shmoradi@microsoft.com>
Date: Thu, 7 Feb 2019 16:00:25 -0800
Subject: [PATCH 01/14] Updated docs for AveragedPerceptron

---
 .../Standard/Online/AveragedLinear.cs         | 35 +++++++++++++++++
 .../Standard/Online/AveragedPerceptron.cs     | 19 ++++++---
 .../Standard/Online/OnlineLinear.cs           | 16 ++++++++
 .../Standard/Online/doc.xml                   | 39 -------------------
 .../StandardLearnersCatalog.cs                | 36 +++++++++++++----
 5 files changed, 93 insertions(+), 52 deletions(-)
diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs
index bf7a88d27d..0a73a0ac3b 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs
@@ -18,36 +18,71 @@ namespace Microsoft.ML.Trainers.Online
 {
     public abstract class AveragedLinearArguments : OnlineLinearArguments
     {
+        /// <summary>
+        /// <a href="tmpurl_lr">Learning rate</a>
+        /// </summary>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Learning rate", ShortName = "lr", SortOrder = 50)]
         [TGUI(Label = "Learning rate", SuggestedSweeps = "0.01,0.1,0.5,1.0")]
         [TlcModule.SweepableDiscreteParam("LearningRate", new object[] { 0.01, 0.1, 0.5, 1.0 })]
         public float LearningRate = AveragedDefaultArgs.LearningRate;
 
+        /// <summary>
+        /// <see langword="true" /> to decrease the <a href="tmpurl_lr">learning rate</a> as iterations progress; otherwise, <see langword="false" />.
+        /// Default is <see langword="false" />.
+        /// </summary>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Decrease learning rate", ShortName = "decreaselr", SortOrder = 50)]
         [TGUI(Label = "Decrease Learning Rate", Description = "Decrease learning rate as iterations progress")]
         [TlcModule.SweepableDiscreteParam("DecreaseLearningRate", new object[] { false, true })]
         public bool DecreaseLearningRate = AveragedDefaultArgs.DecreaseLearningRate;
 
+        /// <summary>
+        /// Number of examples after which weights will be reset to the current average.
+        /// Default is <see langword="null" />, which disables this feature.
+        /// </summary>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Number of examples after which weights will be reset to the current average", ShortName = "numreset")]
         public long? ResetWeightsAfterXExamples = null;
 
+        /// <summary>
+        /// <see langword="true" /> to update averaged weights only when loss is nonzero.
+        /// <see langword="false" /> to update averaged weights on every example.
+        /// Default is <see langword="true" />.
+        /// </summary>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Instead of updating averaged weights on every example, only update when loss is nonzero", ShortName = "lazy")]
         public bool DoLazyUpdates = true;
 
+        /// <summary>
+        /// L2 weight for <a href='tmpurl_regularization'>regularization</a>.
+        /// </summary>
         [Argument(ArgumentType.AtMostOnce, HelpText = "L2 Regularization Weight", ShortName = "reg", SortOrder = 50)]
         [TGUI(Label = "L2 Regularization Weight")]
         [TlcModule.SweepableFloatParam("L2RegularizerWeight", 0.0f, 0.4f)]
         public float L2RegularizerWeight = AveragedDefaultArgs.L2RegularizerWeight;
 
+        /// <summary>
+        /// Extra weight given to more recent updates.
+        /// Default is 0, i.e. no extra gain.
+        /// </summary>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Extra weight given to more recent updates", ShortName = "rg")]
         public float RecencyGain = 0;
 
+        /// <summary>
+        /// <see langword="true" /> means <see cref="RecencyGain"/> is multiplicative.
+        /// <see langword="false" /> means <see cref="RecencyGain"/> is additive.
+        /// Default is <see langword="false" />.
+        /// </summary>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Whether Recency Gain is multiplicative (vs. additive)", ShortName = "rgm")]
         public bool RecencyGainMulti = false;
 
+        /// <summary>
+        /// <see langword="true" /> to do averaging; otherwise, <see langword="false" />.
+        /// Default is <see langword="true" />.
+        /// </summary>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Do averaging?", ShortName = "avg")]
         public bool Averaged = true;
 
+        /// <summary>
+        /// The inexactness tolerance for averaging.
+        /// </summary>
         [Argument(ArgumentType.AtMostOnce, HelpText = "The inexactness tolerance for averaging", ShortName = "avgtol")]
         public float AveragedTolerance = (float)1e-2;
 
diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs
index b3659974fa..2e82f8af5e 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs
@@ -25,12 +25,10 @@
 
 namespace Microsoft.ML.Trainers.Online
 {
-    // This is an averaged perceptron classifier.
-    // Configurable subcomponents:
-    //     - Loss function. By default, hinge loss (aka max-margin avgd perceptron)
-    //     - Feature normalization. By default, rescaling between min and max values for every feature
-    //     - Prediction calibration to produce probabilities. Off by default, if on, uses exponential (aka Platt) calibration.
-    /// <include file='doc.xml' path='doc/members/member[@name="AP"]/*' />
+    /// <summary>
+    /// This is averaged perceptron trainer.
+    /// For usage details, please see <see cref="StandardLearnersCatalog.AveragedPerceptron(BinaryClassificationCatalog.BinaryClassificationTrainers, string, string, string, IClassificationLoss, float, bool, float, int)"/>
+    /// </summary>
     public sealed class AveragedPerceptronTrainer : AveragedLinearTrainer<BinaryPredictionTransformer<LinearBinaryModelParameters>, LinearBinaryModelParameters>
     {
         public const string LoadNameValue = "AveragedPerceptron";
@@ -42,12 +40,21 @@ public sealed class AveragedPerceptronTrainer : AveragedLinearTrainer<BinaryPred
 
         public sealed class Options : AveragedLinearArguments
         {
+            /// <summary>
+            /// The custom <a href="tmpurl_loss">loss</a>. Default is hinge loss.
+            /// </summary>
             [Argument(ArgumentType.Multiple, HelpText = "Loss Function", ShortName = "loss", SortOrder = 50)]
             public ISupportClassificationLossFactory LossFunction = new HingeLoss.Arguments();
 
+            /// <summary>
+            /// The <a href="tmpurl_calib">calibrator</a> for producing probabilities. Default is exponential (aka Platt) calibration.
+            /// </summary>
             [Argument(ArgumentType.AtMostOnce, HelpText = "The calibrator kind to apply to the predictor. Specify null for no calibration", Visibility = ArgumentAttribute.VisibilityType.EntryPointsOnly)]
             public ICalibratorTrainerFactory Calibrator = new PlattCalibratorTrainerFactory();
 
+            /// <summary>
+            /// The maximum number of examples to use when training the calibrator.
+            /// </summary>
             [Argument(ArgumentType.AtMostOnce, HelpText = "The maximum number of examples to use when training the calibrator", Visibility = ArgumentAttribute.VisibilityType.EntryPointsOnly)]
             public int MaxCalibrationExamples = 1000000;
 
diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs
index 2128c8e008..4e1cb6e341 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs
@@ -20,24 +20,40 @@ namespace Microsoft.ML.Trainers.Online
 
     public abstract class OnlineLinearArguments : LearnerInputBaseWithLabel
     {
+        /// <summary>
+        /// Number of training iterations through the data.
+        /// </summary>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Number of iterations", ShortName = "iter", SortOrder = 50)]
         [TGUI(Label = "Number of Iterations", Description = "Number of training iterations through data", SuggestedSweeps = "1,10,100")]
         [TlcModule.SweepableLongParamAttribute("NumIterations", 1, 100, stepSize: 10, isLogScale: true)]
         public int NumIterations = OnlineDefaultArgs.NumIterations;
 
+        /// <summary>
+        /// Initial weights and bias, comma-separated.
+        /// </summary>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Initial Weights and bias, comma-separated", ShortName = "initweights")]
         [TGUI(NoSweep = true)]
         public string InitialWeights;
 
+        /// <summary>
+        /// Initial weights scale.
+        /// </summary>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Init weights diameter", ShortName = "initwts", SortOrder = 140)]
         [TGUI(Label = "Initial Weights Scale", SuggestedSweeps = "0,0.1,0.5,1")]
         [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0.0f, 1.0f, numSteps: 5)]
         public float InitWtsDiameter = 0;
 
+        /// <summary>
+        /// <see langword="true" /> to shuffle data for each training iteration; otherwise, <see langword="false" />.
+        /// Default is <see langword="true" />.
+        /// </summary>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Whether to shuffle for each training iteration", ShortName = "shuf")]
         [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[] { false, true })]
         public bool Shuffle = true;
 
+        /// <summary>
+        /// Size of cache when trained in Scope.
+        /// </summary>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Size of cache when trained in Scope", ShortName = "cache")]
         public int StreamingCacheSize = 1000000;
 
diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/doc.xml b/src/Microsoft.ML.StandardLearners/Standard/Online/doc.xml
index 8e8f5dc2ba..292aeface5 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/Online/doc.xml
+++ b/src/Microsoft.ML.StandardLearners/Standard/Online/doc.xml
@@ -25,44 +25,5 @@
         </code>
       </example>
     </example>
-
-    <member name="AP">
-      <summary>
-        Averaged Perceptron Binary Classifier. 
-      </summary>
-      <remarks>
-        Perceptron is a classification algorithm that makes its predictions based on a linear function.
-        I.e., for an instance with feature values f0, f1,..., f_D-1, , the prediction is given by the sign of sigma[0,D-1] ( w_i * f_i), where w_0, w_1,...,w_D-1 are the weights computed by the algorithm.
-        <para>
-          Perceptron is an online algorithm, i.e., it processes the instances in the training set one at a time.
-          The weights are initialized to be 0, or some random values. Then, for each example in the training set, the value of sigma[0, D-1] (w_i * f_i) is computed.
-          If this value has the same sign as the label of the current example, the weights remain the same. If they have opposite signs,
-          the weights vector is updated by either subtracting or adding (if the label is negative or positive, respectively) the feature vector of the current example,
-          multiplied by a factor 0 &lt; a &lt;= 1, called the learning rate. In a generalization of this algorithm, the weights are updated by adding the feature vector multiplied by the learning rate,
-          and by the gradient of some loss function (in the specific case described above, the loss is hinge-loss, whose gradient is 1 when it is non-zero).
-        </para>
-        <para>
-          In Averaged Perceptron (AKA voted-perceptron), the weight vectors are stored,
-          together with a weight that counts the number of iterations it survived (this is equivalent to storing the weight vector after every iteration, regardless of whether it was updated or not).
-          The prediction is then calculated by taking the weighted average of all the sums sigma[0, D-1] (w_i * f_i) or the different weight vectors.
-        </para>
-        <para> For more information see:</para>
-        <para><a href='https://en.wikipedia.org/wiki/Perceptron'>Wikipedia entry for Perceptron</a></para>
-        <para><a href='https://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.48.8200'>Large Margin Classification Using the Perceptron Algorithm</a></para>
-      </remarks>
-    </member>
-    <example>
-      <example name="AP">
-        <code language="csharp">
-          new AveragedPerceptronBinaryClassifier
-          {
-            NumIterations = 10,
-            L2RegularizerWeight = 0.01f,
-            LossFunction = new ExpLossClassificationLossFunction()
-          }
-        </code>
-      </example>
-    </example>
-
   </members>
 </doc>
diff --git a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
index 6442e4eb01..dd0ffc76c0 100644
--- a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
+++ b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
@@ -191,16 +191,37 @@ public static SdcaMultiClassTrainer StochasticDualCoordinateAscent(this Multicla
         }
 
         /// <summary>
-        /// Predict a target using a linear binary classification model trained with the AveragedPerceptron trainer.
+        /// Predict a target using a linear binary classification model trained with averaged perceptron trainer.
         /// </summary>
+        /// <remarks>
+        /// Perceptron is a classification algorithm that makes its predictions based on a linear function.
+        /// For instance with feature values f0, f1,..., f_D-1, the prediction is given by the sign of sigma[0, D-1] (w_i * f_i), where w_0, w_1,..., w_D-1 are the weights computed by the algorithm.
+        ///
+        /// Perceptron is an online algorithm, i.e., it processes the instances in the training set one at a time.
+        /// The weights are initialized to be 0, or some random values. Then, for each example in the training set, the value of sigma[0, D-1] (w_i * f_i) is computed.
+        /// If this value has the same sign as the label of the current example, the weights remain the same.If they have opposite signs,
+        /// the weights vector is updated by either subtracting or adding (if the label is negative or positive, respectively) the feature vector of the current example,
+        /// multiplied by a factor 0 &lt; a &lt;= 1, called the learning rate.In a generalization of this algorithm, the weights are updated by adding the feature vector multiplied by the learning rate,
+        /// and by the gradient of some loss function (in the specific case described above, the loss is hinge-loss, whose gradient is 1 when it is non-zero).
+        ///
+        /// In Averaged Perceptron (AKA voted-perceptron), the weight vectors are stored,
+        /// together with a weight that counts the number of iterations it survived (this is equivalent to storing the weight vector after every iteration, regardless of whether it was updated or not).
+        /// The prediction is then calculated by taking the weighted average of all the sums sigma[0, D-1] (w_i * f_i) or the different weight vectors.
+        ///
+        /// For more information see <a href="https://en.wikipedia.org/wiki/Perceptron">Wikipedia entry for Perceptron</a>
+        /// or <a href="https://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.48.8200">Large Margin Classification Using the Perceptron Algorithm</a>
+        /// </remarks>
         /// <param name="catalog">The binary classification catalog trainer object.</param>
         /// <param name="labelColumn">The name of the label column, or dependent variable.</param>
         /// <param name="featureColumn">The features, or independent variables.</param>
-        /// <param name="lossFunction">The custom loss.</param>
+        /// <param name="lossFunction">The custom <a href="tmpurl_loss">loss</a>. If <see langword="null"/>, hinge loss will be used resulting in max-margin averaged perceptron.</param>
         /// <param name="weights">The optional example weights.</param>
-        /// <param name="learningRate">The learning Rate.</param>
-        /// <param name="decreaseLearningRate">Decrease learning rate as iterations progress.</param>
-        /// <param name="l2RegularizerWeight">L2 regularization weight.</param>
+        /// <param name="learningRate"><a href="tmpurl_lr">Learning rate</a>.</param>
+        /// <param name="decreaseLearningRate">
+        /// <see langword="true" /> to decrease the <a href="tmpurl_calib">learning rate</a> as iterations progress; otherwise, <see langword="false" />.
+        /// Default is <see langword="false" />.
+        /// </param>
+        /// <param name="l2RegularizerWeight">L2 weight for <a href='tmpurl_regularization'>regularization</a>.</param>
         /// <param name="numIterations">Number of training iterations through the data.</param>
         public static AveragedPerceptronTrainer AveragedPerceptron(
             this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
@@ -220,10 +241,11 @@ public static AveragedPerceptronTrainer AveragedPerceptron(
         }
 
         /// <summary>
-        /// Predict a target using a linear binary classification model trained with the AveragedPerceptron trainer.
+        /// Predict a target using a linear binary classification model trained with averaged perceptron trainer using advanced options.
+        /// For trainer details, please see the remarks for <see cref="AveragedPerceptron(BinaryClassificationCatalog.BinaryClassificationTrainers, string, string, string, IClassificationLoss, float, bool, float, int)"/>
         /// </summary>
         /// <param name="catalog">The binary classification catalog trainer object.</param>
-        /// <param name="options">Advanced arguments to the algorithm.</param>
+        /// <param name="options">Advanced trainer options.</param>
         public static AveragedPerceptronTrainer AveragedPerceptron(
             this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, AveragedPerceptronTrainer.Options options)
         {

From 4222e85005663c696b96315ee0d2d0e8d84591e6 Mon Sep 17 00:00:00 2001
From: Shahab Moradi <shmoradi@microsoft.com>
Date: Fri, 8 Feb 2019 14:29:34 -0800
Subject: [PATCH 02/14] Added a sample

---
 .../AveragedPerceptron.cs                     | 61 +++++++++++++++++++
 src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs | 21 +++++++
 .../SamplesDatasetUtils.cs                    | 31 ++++++++++
 .../StandardLearnersCatalog.cs                |  9 ++-
 4 files changed, 121 insertions(+), 1 deletion(-)
 create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/BinaryClassification/AveragedPerceptron.cs
 create mode 100644 src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/BinaryClassification/AveragedPerceptron.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/BinaryClassification/AveragedPerceptron.cs
new file mode 100644
index 0000000000..8871711b61
--- /dev/null
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/BinaryClassification/AveragedPerceptron.cs
@@ -0,0 +1,61 @@
+﻿using Microsoft.ML;
+
+namespace Microsoft.ML.Samples.Dynamic.BinaryClassification
+{
+    public static class AveragedPerceptron
+    {
+        public static void Example()
+        {
+            // In this examples we will use the adult income dataset. The goal is to predict
+            // if a person's income is above $50K or not, based on different pieces of information about that person.
+            // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult
+
+            // Create a new context for ML.NET operations. It can be used for exception tracking and logging, 
+            // as a catalog of available operations and as the source of randomness.
+            // Setting the seed to a fixed number in this examples to make outputs deterministic.
+            var mlContext = new MLContext(seed: 0);
+
+            // Download the dataset and load it as IDataView
+            var data = SamplesUtils.DatasetUtils.LoadAdultDataset(mlContext);
+
+            // Leave out 10% of data for testing
+            var (trainData, testData) = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
+
+            // Create data processing pipeline
+            var pipeline =
+                // Convert categorical features to one-hot vectors
+                mlContext.Transforms.Categorical.OneHotEncoding("workclass")
+                .Append(mlContext.Transforms.Categorical.OneHotEncoding("education"))
+                .Append(mlContext.Transforms.Categorical.OneHotEncoding("marital-status"))
+                .Append(mlContext.Transforms.Categorical.OneHotEncoding("occupation"))
+                .Append(mlContext.Transforms.Categorical.OneHotEncoding("relationship"))
+                .Append(mlContext.Transforms.Categorical.OneHotEncoding("ethnicity"))
+                .Append(mlContext.Transforms.Categorical.OneHotEncoding("native-country"))
+                // Combine all features into one feature vector
+                .Append(mlContext.Transforms.Concatenate("Features", "workclass", "education", "marital-status",
+                    "occupation", "relationship", "ethnicity", "native-country", "age", "education-num", 
+                    "capital-gain", "capital-loss", "hours-per-week"))
+                // Min-max normalized all the features
+                .Append(mlContext.Transforms.Normalize("Features"))
+                // Add the trainer
+                .Append(mlContext.BinaryClassification.Trainers.AveragedPerceptron("IsOver50K", "Features"));
+
+            // Fit this pipeline to the training data
+            var model = pipeline.Fit(trainData);
+
+            // Evaluate how the model is doing on the test data
+            var dataWithPredictions = model.Transform(testData);
+            var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions, "IsOver50K");
+            SamplesUtils.ConsoleUtils.PrintBinaryClassificationMetrics(metrics);
+
+            // Output:
+            // Accuracy: 0.85
+            // AUC: 0.90
+            // F1 Score: 0.66
+            // Negative Precision: 0.89
+            // Negative Recall: 0.91
+            // Positive Precision: 0.69
+            // Positive Recall: 0.63
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs b/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs
new file mode 100644
index 0000000000..814a251d6a
--- /dev/null
+++ b/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs
@@ -0,0 +1,21 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+using Microsoft.ML.Data;
+
+namespace Microsoft.ML.SamplesUtils
+{
+    public static class ConsoleUtils
+    {
+        public static void PrintBinaryClassificationMetrics(BinaryClassificationMetrics metrics)
+        {
+            Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}");
+            Console.WriteLine($"AUC: {metrics.Auc:F2}");
+            Console.WriteLine($"F1 Score: {metrics.F1Score:F2}");
+            Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}");
+            Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}");
+            Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}");
+            Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}");
+        }
+    }
+}
diff --git a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs
index edaf2d55c5..724f4d10f8 100644
--- a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs
+++ b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs
@@ -81,6 +81,37 @@ public static string DownloadSentimentDataset()
         public static string DownloadAdultDataset()
             => Download("https://raw.githubusercontent.com/dotnet/machinelearning/244a8c2ac832657af282aa312d568211698790aa/test/data/adult.train", "adult.txt");
 
+        public static IDataView LoadAdultDataset(MLContext mlContext)
+        {
+            // Download the file
+            string dataFile = DownloadAdultDataset();
+
+            // Define the columns to read
+            var reader = mlContext.Data.CreateTextLoader(
+                columns: new[]
+                    {
+                        new TextLoader.Column("age", DataKind.R4, 0),
+                        new TextLoader.Column("workclass", DataKind.TX, 1),
+                        new TextLoader.Column("fnlwgt", DataKind.R4, 2),
+                        new TextLoader.Column("education", DataKind.TX, 3),
+                        new TextLoader.Column("education-num", DataKind.R4, 4),
+                        new TextLoader.Column("marital-status", DataKind.TX, 5),
+                        new TextLoader.Column("occupation", DataKind.TX, 6),
+                        new TextLoader.Column("relationship", DataKind.TX, 7),
+                        new TextLoader.Column("ethnicity", DataKind.TX, 8),
+                        new TextLoader.Column("sex", DataKind.TX, 9),
+                        new TextLoader.Column("capital-gain", DataKind.R4, 10),
+                        new TextLoader.Column("capital-loss", DataKind.R4, 11),
+                        new TextLoader.Column("hours-per-week", DataKind.R4, 12),
+                        new TextLoader.Column("native-country", DataKind.R4, 13),
+                        new TextLoader.Column("IsOver50K", DataKind.BL, 14),
+                    },
+                separatorChar: ',',
+                hasHeader: true
+            );
+
+            return reader.Read(dataFile);
+        }
         /// <summary>
         /// Downloads the breast cancer dataset from the ML.NET repo.
         /// </summary>
diff --git a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
index dd0ffc76c0..01bbc7f60a 100644
--- a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
+++ b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
@@ -223,6 +223,13 @@ public static SdcaMultiClassTrainer StochasticDualCoordinateAscent(this Multicla
         /// </param>
         /// <param name="l2RegularizerWeight">L2 weight for <a href='tmpurl_regularization'>regularization</a>.</param>
         /// <param name="numIterations">Number of training iterations through the data.</param>
+        /// <example>
+        /// <format type="text/markdown">
+        /// <![CDATA[
+        /// [!code-csharp[AveragedPerceptron](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/BinaryClassification/AveragedPerceptron.cs)]
+        /// ]]>
+        /// </format>
+        /// </example>
         public static AveragedPerceptronTrainer AveragedPerceptron(
             this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
             string labelColumn = DefaultColumnNames.Label,
@@ -242,7 +249,7 @@ public static AveragedPerceptronTrainer AveragedPerceptron(
 
         /// <summary>
         /// Predict a target using a linear binary classification model trained with averaged perceptron trainer using advanced options.
-        /// For trainer details, please see the remarks for <see cref="AveragedPerceptron(BinaryClassificationCatalog.BinaryClassificationTrainers, string, string, string, IClassificationLoss, float, bool, float, int)"/>
+        /// For usage details, please see <see cref="AveragedPerceptron(BinaryClassificationCatalog.BinaryClassificationTrainers, string, string, string, IClassificationLoss, float, bool, float, int)"/>
         /// </summary>
         /// <param name="catalog">The binary classification catalog trainer object.</param>
         /// <param name="options">Advanced trainer options.</param>

From f4f03ba7101f16c231eecd58272197278be2a3d8 Mon Sep 17 00:00:00 2001
From: Shahab Moradi <shmoradi@microsoft.com>
Date: Tue, 12 Feb 2019 11:23:53 -0800
Subject: [PATCH 03/14] Addressed PR comments

---
 .../AveragedPerceptron.cs                     | 61 -------------------
 .../AveragedPerceptron.cs                     | 45 ++++++++++++++
 src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs |  9 ++-
 .../Microsoft.ML.SamplesUtils.csproj          |  2 +
 .../SamplesDatasetUtils.cs                    | 25 +++++++-
 .../Standard/Online/AveragedLinear.cs         | 34 ++++++++---
 .../Standard/Online/AveragedPerceptron.cs     | 12 ++--
 .../Standard/Online/LinearSvm.cs              |  2 +-
 .../Standard/Online/OnlineGradientDescent.cs  |  2 +-
 .../Standard/Online/OnlineLinear.cs           | 33 +++++-----
 .../StandardLearnersCatalog.cs                | 13 ++--
 .../TestPredictors.cs                         |  2 +-
 .../Scenarios/Api/TestApi.cs                  |  2 +-
 test/Microsoft.ML.Tests/Scenarios/OvaTest.cs  |  2 +-
 14 files changed, 137 insertions(+), 107 deletions(-)
 delete mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/BinaryClassification/AveragedPerceptron.cs
 create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/BinaryClassification/AveragedPerceptron.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/BinaryClassification/AveragedPerceptron.cs
deleted file mode 100644
index 8871711b61..0000000000
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/BinaryClassification/AveragedPerceptron.cs
+++ /dev/null
@@ -1,61 +0,0 @@
-﻿using Microsoft.ML;
-
-namespace Microsoft.ML.Samples.Dynamic.BinaryClassification
-{
-    public static class AveragedPerceptron
-    {
-        public static void Example()
-        {
-            // In this examples we will use the adult income dataset. The goal is to predict
-            // if a person's income is above $50K or not, based on different pieces of information about that person.
-            // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult
-
-            // Create a new context for ML.NET operations. It can be used for exception tracking and logging, 
-            // as a catalog of available operations and as the source of randomness.
-            // Setting the seed to a fixed number in this examples to make outputs deterministic.
-            var mlContext = new MLContext(seed: 0);
-
-            // Download the dataset and load it as IDataView
-            var data = SamplesUtils.DatasetUtils.LoadAdultDataset(mlContext);
-
-            // Leave out 10% of data for testing
-            var (trainData, testData) = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
-
-            // Create data processing pipeline
-            var pipeline =
-                // Convert categorical features to one-hot vectors
-                mlContext.Transforms.Categorical.OneHotEncoding("workclass")
-                .Append(mlContext.Transforms.Categorical.OneHotEncoding("education"))
-                .Append(mlContext.Transforms.Categorical.OneHotEncoding("marital-status"))
-                .Append(mlContext.Transforms.Categorical.OneHotEncoding("occupation"))
-                .Append(mlContext.Transforms.Categorical.OneHotEncoding("relationship"))
-                .Append(mlContext.Transforms.Categorical.OneHotEncoding("ethnicity"))
-                .Append(mlContext.Transforms.Categorical.OneHotEncoding("native-country"))
-                // Combine all features into one feature vector
-                .Append(mlContext.Transforms.Concatenate("Features", "workclass", "education", "marital-status",
-                    "occupation", "relationship", "ethnicity", "native-country", "age", "education-num", 
-                    "capital-gain", "capital-loss", "hours-per-week"))
-                // Min-max normalized all the features
-                .Append(mlContext.Transforms.Normalize("Features"))
-                // Add the trainer
-                .Append(mlContext.BinaryClassification.Trainers.AveragedPerceptron("IsOver50K", "Features"));
-
-            // Fit this pipeline to the training data
-            var model = pipeline.Fit(trainData);
-
-            // Evaluate how the model is doing on the test data
-            var dataWithPredictions = model.Transform(testData);
-            var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions, "IsOver50K");
-            SamplesUtils.ConsoleUtils.PrintBinaryClassificationMetrics(metrics);
-
-            // Output:
-            // Accuracy: 0.85
-            // AUC: 0.90
-            // F1 Score: 0.66
-            // Negative Precision: 0.89
-            // Negative Recall: 0.91
-            // Positive Precision: 0.69
-            // Positive Recall: 0.63
-        }
-    }
-}
\ No newline at end of file
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs
new file mode 100644
index 0000000000..35bdefa434
--- /dev/null
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs
@@ -0,0 +1,45 @@
+﻿using Microsoft.ML;
+
+namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification
+{
+    public static class AveragedPerceptron
+    {
+        public static void Example()
+        {
+            // In this examples we will use the adult income dataset. The goal is to predict
+            // if a person's income is above $50K or not, based on different pieces of information about that person.
+            // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult
+
+            // Create a new context for ML.NET operations. It can be used for exception tracking and logging, 
+            // as a catalog of available operations and as the source of randomness.
+            // Setting the seed to a fixed number in this example to make outputs deterministic.
+            var mlContext = new MLContext(seed: 0);
+
+            // Download and featurize the dataset
+            var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);
+
+            // Leave out 10% of data for testing
+            var (trainData, testData) = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
+
+            // Create data training pipeline
+            var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron("IsOver50K", "Features");
+
+            // Fit this pipeline to the training data
+            var model = pipeline.Fit(trainData);
+
+            // Evaluate how the model is doing on the test data
+            var dataWithPredictions = model.Transform(testData);
+            var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions, "IsOver50K");
+            SamplesUtils.ConsoleUtils.PrintMetrics(metrics);
+
+            // Output:
+            // Accuracy: 0.85
+            // AUC: 0.90
+            // F1 Score: 0.66
+            // Negative Precision: 0.89
+            // Negative Recall: 0.91
+            // Positive Precision: 0.69
+            // Positive Recall: 0.63
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs b/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs
index 814a251d6a..83fafd8658 100644
--- a/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs
+++ b/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs
@@ -5,9 +5,16 @@
 
 namespace Microsoft.ML.SamplesUtils
 {
+    /// <summary>
+    /// Utilities for creating console outputs in samples' code.
+    /// </summary>
     public static class ConsoleUtils
     {
-        public static void PrintBinaryClassificationMetrics(BinaryClassificationMetrics metrics)
+        /// <summary>
+        /// Pretty-print BinaryClassificationMetrics objects.
+        /// </summary>
+        /// <param name="metrics">Binary classification metrics.</param>
+        public static void PrintMetrics(BinaryClassificationMetrics metrics)
         {
             Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}");
             Console.WriteLine($"AUC: {metrics.Auc:F2}");
diff --git a/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj b/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj
index e4d6c5d504..0bdb047d42 100644
--- a/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj
+++ b/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj
@@ -6,7 +6,9 @@
   </PropertyGroup>
 
   <ItemGroup>
+    <ProjectReference Include="..\Microsoft.ML.Core\Microsoft.ML.Core.csproj" />
     <ProjectReference Include="..\Microsoft.ML.Data\Microsoft.ML.Data.csproj" />
+    <ProjectReference Include="..\Microsoft.ML.Transforms\Microsoft.ML.Transforms.csproj" />
   </ItemGroup>
 
 </Project>
diff --git a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs
index 724f4d10f8..e6f45c0eeb 100644
--- a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs
+++ b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs
@@ -7,6 +7,7 @@
 using System.IO;
 using System.Net;
 using Microsoft.Data.DataView;
+using Microsoft.ML;
 using Microsoft.ML.Data;
 
 namespace Microsoft.ML.SamplesUtils
@@ -81,7 +82,7 @@ public static string DownloadSentimentDataset()
         public static string DownloadAdultDataset()
             => Download("https://raw.githubusercontent.com/dotnet/machinelearning/244a8c2ac832657af282aa312d568211698790aa/test/data/adult.train", "adult.txt");
 
-        public static IDataView LoadAdultDataset(MLContext mlContext)
+        public static IDataView LoadFeaturizedAdultDataset(MLContext mlContext)
         {
             // Download the file
             string dataFile = DownloadAdultDataset();
@@ -110,8 +111,28 @@ public static IDataView LoadAdultDataset(MLContext mlContext)
                 hasHeader: true
             );
 
-            return reader.Read(dataFile);
+            // Create data featurizing pipeline
+            var pipeline =
+                // Convert categorical features to one-hot vectors
+                mlContext.Transforms.Categorical.OneHotEncoding("workclass")
+                .Append(mlContext.Transforms.Categorical.OneHotEncoding("education"))
+                .Append(mlContext.Transforms.Categorical.OneHotEncoding("marital-status"))
+                .Append(mlContext.Transforms.Categorical.OneHotEncoding("occupation"))
+                .Append(mlContext.Transforms.Categorical.OneHotEncoding("relationship"))
+                .Append(mlContext.Transforms.Categorical.OneHotEncoding("ethnicity"))
+                .Append(mlContext.Transforms.Categorical.OneHotEncoding("native-country"))
+                // Combine all features into one feature vector
+                .Append(mlContext.Transforms.Concatenate("Features", "workclass", "education", "marital-status",
+                    "occupation", "relationship", "ethnicity", "native-country", "age", "education-num",
+                    "capital-gain", "capital-loss", "hours-per-week"))
+                // Min-max normalized all the features
+                .Append(mlContext.Transforms.Normalize("Features"));
+
+            var data = reader.Read(dataFile);
+            var featurizedData = pipeline.Fit(data).Transform(data);
+            return featurizedData;
         }
+
         /// <summary>
         /// Downloads the breast cancer dataset from the ML.NET repo.
         /// </summary>
diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs
index 0a73a0ac3b..6460dcc48e 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs
@@ -19,7 +19,7 @@ namespace Microsoft.ML.Trainers.Online
     public abstract class AveragedLinearArguments : OnlineLinearArguments
     {
         /// <summary>
-        /// <a href="tmpurl_lr">Learning rate</a>
+        /// <a href="tmpurl_lr">Learning rate</a>.
         /// </summary>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Learning rate", ShortName = "lr", SortOrder = 50)]
         [TGUI(Label = "Learning rate", SuggestedSweeps = "0.01,0.1,0.5,1.0")]
@@ -27,9 +27,12 @@ public abstract class AveragedLinearArguments : OnlineLinearArguments
         public float LearningRate = AveragedDefaultArgs.LearningRate;
 
         /// <summary>
-        /// <see langword="true" /> to decrease the <a href="tmpurl_lr">learning rate</a> as iterations progress; otherwise, <see langword="false" />.
-        /// Default is <see langword="false" />.
+        /// Determine whether to decrease the <see cref="LearningRate"/> or not.
         /// </summary>
+        /// <value>
+        /// <see langword="true" /> to decrease the <see cref="LearningRate"/> as iterations progress; otherwise, <see langword="false" />.
+        /// Default is <see langword="false" />.
+        /// </value>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Decrease learning rate", ShortName = "decreaselr", SortOrder = 50)]
         [TGUI(Label = "Decrease Learning Rate", Description = "Decrease learning rate as iterations progress")]
         [TlcModule.SweepableDiscreteParam("DecreaseLearningRate", new object[] { false, true })]
@@ -37,16 +40,21 @@ public abstract class AveragedLinearArguments : OnlineLinearArguments
 
         /// <summary>
         /// Number of examples after which weights will be reset to the current average.
-        /// Default is <see langword="null" />, which disables this feature.
         /// </summary>
+        /// <value>
+        /// Default is <see langword="null" />, which disables this feature.
+        /// </value>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Number of examples after which weights will be reset to the current average", ShortName = "numreset")]
         public long? ResetWeightsAfterXExamples = null;
 
         /// <summary>
+        /// Determines when to update averaged weights.
+        /// </summary>
+        /// <value>
         /// <see langword="true" /> to update averaged weights only when loss is nonzero.
         /// <see langword="false" /> to update averaged weights on every example.
         /// Default is <see langword="true" />.
-        /// </summary>
+        /// </value>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Instead of updating averaged weights on every example, only update when loss is nonzero", ShortName = "lazy")]
         public bool DoLazyUpdates = true;
 
@@ -60,23 +68,31 @@ public abstract class AveragedLinearArguments : OnlineLinearArguments
 
         /// <summary>
         /// Extra weight given to more recent updates.
-        /// Default is 0, i.e. no extra gain.
         /// </summary>
+        /// <value>
+        /// Default is 0, i.e. no extra gain.
+        /// </value>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Extra weight given to more recent updates", ShortName = "rg")]
         public float RecencyGain = 0;
 
         /// <summary>
+        /// Determines whether <see cref="RecencyGain"/> is multiplicative or additive.
+        /// </summary>
+        /// <value>
         /// <see langword="true" /> means <see cref="RecencyGain"/> is multiplicative.
         /// <see langword="false" /> means <see cref="RecencyGain"/> is additive.
         /// Default is <see langword="false" />.
-        /// </summary>
+        /// </value>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Whether Recency Gain is multiplicative (vs. additive)", ShortName = "rgm")]
         public bool RecencyGainMulti = false;
 
         /// <summary>
+        /// Determines whether to do averaging or not.
+        /// </summary>
+        /// <value>
         /// <see langword="true" /> to do averaging; otherwise, <see langword="false" />.
         /// Default is <see langword="true" />.
-        /// </summary>
+        /// </value>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Do averaging?", ShortName = "avg")]
         public bool Averaged = true;
 
@@ -84,7 +100,7 @@ public abstract class AveragedLinearArguments : OnlineLinearArguments
         /// The inexactness tolerance for averaging.
         /// </summary>
         [Argument(ArgumentType.AtMostOnce, HelpText = "The inexactness tolerance for averaging", ShortName = "avgtol")]
-        public float AveragedTolerance = (float)1e-2;
+        internal float AveragedTolerance = (float)1e-2;
 
         [BestFriend]
         internal class AveragedDefaultArgs : OnlineDefaultArgs
diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs
index 2e82f8af5e..8de643f9d3 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs
@@ -27,8 +27,10 @@ namespace Microsoft.ML.Trainers.Online
 {
     /// <summary>
     /// This is averaged perceptron trainer.
-    /// For usage details, please see <see cref="StandardLearnersCatalog.AveragedPerceptron(BinaryClassificationCatalog.BinaryClassificationTrainers, string, string, string, IClassificationLoss, float, bool, float, int)"/>
     /// </summary>
+    /// <remarks>
+    /// For usage details, please see <see cref="StandardLearnersCatalog.AveragedPerceptron(BinaryClassificationCatalog.BinaryClassificationTrainers, string, string, string, IClassificationLoss, float, bool, float, int)"/>
+    /// </remarks>
     public sealed class AveragedPerceptronTrainer : AveragedLinearTrainer<BinaryPredictionTransformer<LinearBinaryModelParameters>, LinearBinaryModelParameters>
     {
         public const string LoadNameValue = "AveragedPerceptron";
@@ -41,7 +43,7 @@ public sealed class AveragedPerceptronTrainer : AveragedLinearTrainer<BinaryPred
         public sealed class Options : AveragedLinearArguments
         {
             /// <summary>
-            /// The custom <a href="tmpurl_loss">loss</a>. Default is hinge loss.
+            /// The custom <a href="tmpurl_loss">loss</a>.
             /// </summary>
             [Argument(ArgumentType.Multiple, HelpText = "Loss Function", ShortName = "loss", SortOrder = 50)]
             public ISupportClassificationLossFactory LossFunction = new HingeLoss.Arguments();
@@ -108,9 +110,9 @@ internal AveragedPerceptronTrainer(IHostEnvironment env, Options options)
         /// <param name="featureColumn">The name of the feature column.</param>
         /// <param name="weights">The optional name of the weights column.</param>
         /// <param name="learningRate">The learning rate. </param>
-        /// <param name="decreaseLearningRate">Wheather to decrease learning rate as iterations progress.</param>
+        /// <param name="decreaseLearningRate">Whether to decrease learning rate as iterations progress.</param>
         /// <param name="l2RegularizerWeight">L2 Regularization Weight.</param>
-        /// <param name="numIterations">The number of training iteraitons.</param>
+        /// <param name="numIterations">The number of training iterations.</param>
         internal AveragedPerceptronTrainer(IHostEnvironment env,
             string labelColumn = DefaultColumnNames.Label,
             string featureColumn = DefaultColumnNames.Features,
@@ -128,7 +130,7 @@ internal AveragedPerceptronTrainer(IHostEnvironment env,
                 LearningRate = learningRate,
                 DecreaseLearningRate = decreaseLearningRate,
                 L2RegularizerWeight = l2RegularizerWeight,
-                NumIterations = numIterations,
+                NumberOfIterations = numIterations,
                 LossFunction = new TrivialFactory(lossFunction ?? new HingeLoss())
             })
         {
diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/LinearSvm.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/LinearSvm.cs
index 184a6554aa..a214ce9505 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/Online/LinearSvm.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/Online/LinearSvm.cs
@@ -240,7 +240,7 @@ internal LinearSvmTrainer(IHostEnvironment env,
                 LabelColumn = labelColumn,
                 FeatureColumn = featureColumn,
                 InitialWeights = weightsColumn,
-                NumIterations = numIterations,
+                NumberOfIterations = numIterations,
             })
         {
         }
diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineGradientDescent.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineGradientDescent.cs
index 39ed31a6ec..4c481904c3 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineGradientDescent.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineGradientDescent.cs
@@ -115,7 +115,7 @@ internal OnlineGradientDescentTrainer(IHostEnvironment env,
                 LearningRate = learningRate,
                 DecreaseLearningRate = decreaseLearningRate,
                 L2RegularizerWeight = l2RegularizerWeight,
-                NumIterations = numIterations,
+                NumberOfIterations = numIterations,
                 LabelColumn = labelColumn,
                 FeatureColumn = featureColumn,
                 InitialWeights = weightsColumn,
diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs
index 4e1cb6e341..6ba61f77f5 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs
@@ -26,7 +26,7 @@ public abstract class OnlineLinearArguments : LearnerInputBaseWithLabel
         [Argument(ArgumentType.AtMostOnce, HelpText = "Number of iterations", ShortName = "iter", SortOrder = 50)]
         [TGUI(Label = "Number of Iterations", Description = "Number of training iterations through data", SuggestedSweeps = "1,10,100")]
         [TlcModule.SweepableLongParamAttribute("NumIterations", 1, 100, stepSize: 10, isLogScale: true)]
-        public int NumIterations = OnlineDefaultArgs.NumIterations;
+        public int NumberOfIterations = OnlineDefaultArgs.NumIterations;
 
         /// <summary>
         /// Initial weights and bias, comma-separated.
@@ -36,12 +36,16 @@ public abstract class OnlineLinearArguments : LearnerInputBaseWithLabel
         public string InitialWeights;
 
         /// <summary>
-        /// Initial weights scale.
+        /// Initial weights and bias scale.
         /// </summary>
-        [Argument(ArgumentType.AtMostOnce, HelpText = "Init weights diameter", ShortName = "initwts", SortOrder = 140)]
+        /// <value>
+        /// This property is only used if the provided value is positive and <see cref="InitialWeights"/> is not specified.
+        /// The weights and bias will be randomly selected from InitialWeights * [-0.5,0.5] interval with uniform distribution.
+        /// </value>
+        [Argument(ArgumentType.AtMostOnce, HelpText = "Init weights diameter", ShortName = "initwts, initWtsDiameter", SortOrder = 140)]
         [TGUI(Label = "Initial Weights Scale", SuggestedSweeps = "0,0.1,0.5,1")]
         [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0.0f, 1.0f, numSteps: 5)]
-        public float InitWtsDiameter = 0;
+        public float InitialWeightsDiameter = 0;
 
         /// <summary>
         /// <see langword="true" /> to shuffle data for each training iteration; otherwise, <see langword="false" />.
@@ -51,12 +55,6 @@ public abstract class OnlineLinearArguments : LearnerInputBaseWithLabel
         [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[] { false, true })]
         public bool Shuffle = true;
 
-        /// <summary>
-        /// Size of cache when trained in Scope.
-        /// </summary>
-        [Argument(ArgumentType.AtMostOnce, HelpText = "Size of cache when trained in Scope", ShortName = "cache")]
-        public int StreamingCacheSize = 1000000;
-
         [BestFriend]
         internal class OnlineDefaultArgs
         {
@@ -151,13 +149,13 @@ protected TrainStateBase(IChannel ch, int numFeatures, LinearModelParameters pre
                     Weights = new VBuffer<float>(numFeatures, weightValues);
                     Bias = float.Parse(weightStr[numFeatures], CultureInfo.InvariantCulture);
                 }
-                else if (parent.Args.InitWtsDiameter > 0)
+                else if (parent.Args.InitialWeightsDiameter > 0)
                 {
                     var weightValues = new float[numFeatures];
                     for (int i = 0; i < numFeatures; i++)
-                        weightValues[i] = parent.Args.InitWtsDiameter * (parent.Host.Rand.NextSingle() - (float)0.5);
+                        weightValues[i] = parent.Args.InitialWeightsDiameter * (parent.Host.Rand.NextSingle() - (float)0.5);
                     Weights = new VBuffer<float>(numFeatures, weightValues);
-                    Bias = parent.Args.InitWtsDiameter * (parent.Host.Rand.NextSingle() - (float)0.5);
+                    Bias = parent.Args.InitialWeightsDiameter * (parent.Host.Rand.NextSingle() - (float)0.5);
                 }
                 else if (numFeatures <= 1000)
                     Weights = VBufferUtils.CreateDense<float>(numFeatures);
@@ -255,9 +253,8 @@ private protected OnlineLinearTrainer(OnlineLinearArguments args, IHostEnvironme
             : base(Contracts.CheckRef(env, nameof(env)).Register(name), TrainerUtils.MakeR4VecFeature(args.FeatureColumn), label, TrainerUtils.MakeR4ScalarWeightColumn(args.InitialWeights))
         {
             Contracts.CheckValue(args, nameof(args));
-            Contracts.CheckUserArg(args.NumIterations > 0, nameof(args.NumIterations), UserErrorPositive);
-            Contracts.CheckUserArg(args.InitWtsDiameter >= 0, nameof(args.InitWtsDiameter), UserErrorNonNegative);
-            Contracts.CheckUserArg(args.StreamingCacheSize > 0, nameof(args.StreamingCacheSize), UserErrorPositive);
+            Contracts.CheckUserArg(args.NumberOfIterations > 0, nameof(args.NumberOfIterations), UserErrorPositive);
+            Contracts.CheckUserArg(args.InitialWeightsDiameter >= 0, nameof(args.InitialWeightsDiameter), UserErrorNonNegative);
 
             Args = args;
             Name = name;
@@ -307,7 +304,7 @@ private void TrainCore(IChannel ch, RoleMappedData data, TrainStateBase state)
 
             var cursorFactory = new FloatLabelCursor.Factory(data, cursorOpt);
             long numBad = 0;
-            while (state.Iteration < Args.NumIterations)
+            while (state.Iteration < Args.NumberOfIterations)
             {
                 state.BeginIteration(ch);
 
@@ -325,7 +322,7 @@ private void TrainCore(IChannel ch, RoleMappedData data, TrainStateBase state)
             {
                 ch.Warning(
                     "Skipped {0} instances with missing features during training (over {1} iterations; {2} inst/iter)",
-                    numBad, Args.NumIterations, numBad / Args.NumIterations);
+                    numBad, Args.NumberOfIterations, numBad / Args.NumberOfIterations);
             }
         }
 
diff --git a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
index 01bbc7f60a..0fe34e2b2e 100644
--- a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
+++ b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
@@ -194,11 +194,12 @@ public static SdcaMultiClassTrainer StochasticDualCoordinateAscent(this Multicla
         /// Predict a target using a linear binary classification model trained with averaged perceptron trainer.
         /// </summary>
         /// <remarks>
-        /// Perceptron is a classification algorithm that makes its predictions based on a linear function.
-        /// For instance with feature values f0, f1,..., f_D-1, the prediction is given by the sign of sigma[0, D-1] (w_i * f_i), where w_0, w_1,..., w_D-1 are the weights computed by the algorithm.
+        /// Perceptron is a classification algorithm that makes its predictions by finding a separating hyperplane.
+        /// For instance, with feature values f0, f1,..., f_D-1, the prediction is given by determining what side of the hyperplane the point falls into.
+        /// That is the same as the sign of sigma[0, D-1] (w_i * f_i), where w_0, w_1,..., w_D-1 are the weights computed by the algorithm.
         ///
-        /// Perceptron is an online algorithm, i.e., it processes the instances in the training set one at a time.
-        /// The weights are initialized to be 0, or some random values. Then, for each example in the training set, the value of sigma[0, D-1] (w_i * f_i) is computed.
+        /// The perceptron is an online algorithm, which means it processes the instances in the training set one at a time.
+        /// It starts with a set of initial weights (zero, random, or initialized from a previous learner). Then, for each example in the training set, the weighted sum of the features (sigma[0, D-1] (w_i * f_i)) is computed.
         /// If this value has the same sign as the label of the current example, the weights remain the same.If they have opposite signs,
         /// the weights vector is updated by either subtracting or adding (if the label is negative or positive, respectively) the feature vector of the current example,
         /// multiplied by a factor 0 &lt; a &lt;= 1, called the learning rate.In a generalization of this algorithm, the weights are updated by adding the feature vector multiplied by the learning rate,
@@ -218,7 +219,7 @@ public static SdcaMultiClassTrainer StochasticDualCoordinateAscent(this Multicla
         /// <param name="weights">The optional example weights.</param>
         /// <param name="learningRate"><a href="tmpurl_lr">Learning rate</a>.</param>
         /// <param name="decreaseLearningRate">
-        /// <see langword="true" /> to decrease the <a href="tmpurl_calib">learning rate</a> as iterations progress; otherwise, <see langword="false" />.
+        /// <see langword="true" /> to decrease the <paramref name="learningRate"/> as iterations progress; otherwise, <see langword="false" />.
         /// Default is <see langword="false" />.
         /// </param>
         /// <param name="l2RegularizerWeight">L2 weight for <a href='tmpurl_regularization'>regularization</a>.</param>
@@ -252,7 +253,7 @@ public static AveragedPerceptronTrainer AveragedPerceptron(
         /// For usage details, please see <see cref="AveragedPerceptron(BinaryClassificationCatalog.BinaryClassificationTrainers, string, string, string, IClassificationLoss, float, bool, float, int)"/>
         /// </summary>
         /// <param name="catalog">The binary classification catalog trainer object.</param>
-        /// <param name="options">Advanced trainer options.</param>
+        /// <param name="options">Trainer options.</param>
         public static AveragedPerceptronTrainer AveragedPerceptron(
             this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, AveragedPerceptronTrainer.Options options)
         {
diff --git a/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs b/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs
index 339bede3e5..8d523ade17 100644
--- a/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs
+++ b/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs
@@ -748,7 +748,7 @@ public void TestEnsembleCombiner()
                 {
                     FeatureColumn = "Features",
                     LabelColumn = DefaultColumnNames.Label,
-                    NumIterations = 2,
+                    NumberOfIterations = 2,
                     TrainingData = dataView,
                     NormalizeFeatures = NormalizeOption.No
                 }).PredictorModel,
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/TestApi.cs b/test/Microsoft.ML.Tests/Scenarios/Api/TestApi.cs
index bc43d688fb..c762401e74 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/TestApi.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/TestApi.cs
@@ -182,7 +182,7 @@ public void TrainAveragedPerceptronWithCache()
             var cached = mlContext.Data.Cache(xf);
 
             var estimator = mlContext.BinaryClassification.Trainers.AveragedPerceptron(
-                new AveragedPerceptronTrainer.Options { NumIterations = 2 });
+                new AveragedPerceptronTrainer.Options { NumberOfIterations = 2 });
 
             estimator.Fit(cached).Transform(cached);
 
diff --git a/test/Microsoft.ML.Tests/Scenarios/OvaTest.cs b/test/Microsoft.ML.Tests/Scenarios/OvaTest.cs
index ecea5411a6..c708bb95b9 100644
--- a/test/Microsoft.ML.Tests/Scenarios/OvaTest.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/OvaTest.cs
@@ -133,7 +133,7 @@ public void OvaLinearSvm()
 
             // Pipeline
             var pipeline = mlContext.MulticlassClassification.Trainers.OneVersusAll(
-                mlContext.BinaryClassification.Trainers.LinearSupportVectorMachines(new LinearSvmTrainer.Options { NumIterations = 100 }),
+                mlContext.BinaryClassification.Trainers.LinearSupportVectorMachines(new LinearSvmTrainer.Options { NumberOfIterations = 100 }),
                 useProbabilities: false);
 
             var model = pipeline.Fit(data);

From ceb3aa299b6971b6c40e337facf3a286e0210f04 Mon Sep 17 00:00:00 2001
From: Shahab Moradi <shmoradi@microsoft.com>
Date: Tue, 12 Feb 2019 11:47:15 -0800
Subject: [PATCH 04/14] Added sample for the second overload with trainer
 options.

---
 .../AveragedPerceptronWithOptions.cs          | 58 +++++++++++++++++++
 .../StandardLearnersCatalog.cs                |  7 +++
 2 files changed, 65 insertions(+)
 create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs
new file mode 100644
index 0000000000..eaf8066398
--- /dev/null
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs
@@ -0,0 +1,58 @@
+﻿using Microsoft.ML;
+using Microsoft.ML.Trainers.Online;
+
+namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification
+{
+    public static class AveragedPerceptronWithOptions
+    {
+        public static void Example()
+        {
+            // In this examples we will use the adult income dataset. The goal is to predict
+            // if a person's income is above $50K or not, based on different pieces of information about that person.
+            // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult
+
+            // Create a new context for ML.NET operations. It can be used for exception tracking and logging, 
+            // as a catalog of available operations and as the source of randomness.
+            // Setting the seed to a fixed number in this example to make outputs deterministic.
+            var mlContext = new MLContext(seed: 0);
+
+            // Download and featurize the dataset
+            var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);
+
+            // Leave out 10% of data for testing
+            var (trainData, testData) = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
+
+            // Define the trainer options
+            var options = new AveragedPerceptronTrainer.Options()
+            {
+                LossFunction = new SmoothedHingeLoss.Arguments(),
+                LearningRate = 0.1f,
+                DoLazyUpdates = false,
+                RecencyGain = 0.1f,
+                NumberOfIterations = 10,
+                LabelColumn = "IsOver50K",
+                FeatureColumn = "Features"
+            };
+
+            // Create data training pipeline
+            var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron(options);
+
+            // Fit this pipeline to the training data
+            var model = pipeline.Fit(trainData);
+
+            // Evaluate how the model is doing on the test data
+            var dataWithPredictions = model.Transform(testData);
+            var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions, "IsOver50K");
+            SamplesUtils.ConsoleUtils.PrintMetrics(metrics);
+
+            // Output:
+            // Accuracy: 0.86
+            // AUC: 0.90
+            // F1 Score: 0.66
+            // Negative Precision: 0.89
+            // Negative Recall: 0.93
+            // Positive Precision: 0.72
+            // Positive Recall: 0.61
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
index 0fe34e2b2e..d190a3509c 100644
--- a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
+++ b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
@@ -254,6 +254,13 @@ public static AveragedPerceptronTrainer AveragedPerceptron(
         /// </summary>
         /// <param name="catalog">The binary classification catalog trainer object.</param>
         /// <param name="options">Trainer options.</param>
+        /// <example>
+        /// <format type="text/markdown">
+        /// <![CDATA[
+        /// [!code-csharp[AveragedPerceptron](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/BinaryClassification/AveragedPerceptronWithOptions.cs)]
+        /// ]]>
+        /// </format>
+        /// </example>
         public static AveragedPerceptronTrainer AveragedPerceptron(
             this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, AveragedPerceptronTrainer.Options options)
         {

From 2b96f6daea01b09dc894b30f7b04f93042c13297 Mon Sep 17 00:00:00 2001
From: Shahab Moradi <shmoradi@microsoft.com>
Date: Tue, 12 Feb 2019 14:20:19 -0800
Subject: [PATCH 05/14] Fixed the failing tests

---
 .../Common/EntryPoints/core_manifest.json     | 57 ++++---------------
 .../UnitTests/TestEntryPoints.cs              |  5 +-
 2 files changed, 14 insertions(+), 48 deletions(-)

diff --git a/test/BaselineOutput/Common/EntryPoints/core_manifest.json b/test/BaselineOutput/Common/EntryPoints/core_manifest.json
index 5154957bf1..5d720ce6de 100644
--- a/test/BaselineOutput/Common/EntryPoints/core_manifest.json
+++ b/test/BaselineOutput/Common/EntryPoints/core_manifest.json
@@ -4306,7 +4306,7 @@
           }
         },
         {
-          "Name": "NumIterations",
+          "Name": "NumberOfIterations",
           "Type": "Int",
           "Desc": "Number of iterations",
           "Aliases": [
@@ -4325,11 +4325,12 @@
           }
         },
         {
-          "Name": "InitWtsDiameter",
+          "Name": "InitialWeightsDiameter",
           "Type": "Float",
           "Desc": "Init weights diameter",
           "Aliases": [
-            "initwts"
+            "initwts",
+            "initWtsDiameter"
           ],
           "Required": false,
           "SortOrder": 140.0,
@@ -4467,18 +4468,6 @@
               true
             ]
           }
-        },
-        {
-          "Name": "StreamingCacheSize",
-          "Type": "Int",
-          "Desc": "Size of cache when trained in Scope",
-          "Aliases": [
-            "cache"
-          ],
-          "Required": false,
-          "SortOrder": 150.0,
-          "IsNullable": false,
-          "Default": 1000000
         }
       ],
       "Outputs": [
@@ -13247,7 +13236,7 @@
           }
         },
         {
-          "Name": "NumIterations",
+          "Name": "NumberOfIterations",
           "Type": "Int",
           "Desc": "Number of iterations",
           "Aliases": [
@@ -13266,11 +13255,12 @@
           }
         },
         {
-          "Name": "InitWtsDiameter",
+          "Name": "InitialWeightsDiameter",
           "Type": "Float",
           "Desc": "Init weights diameter",
           "Aliases": [
-            "initwts"
+            "initwts",
+            "initWtsDiameter"
           ],
           "Required": false,
           "SortOrder": 140.0,
@@ -13353,18 +13343,6 @@
             ]
           }
         },
-        {
-          "Name": "StreamingCacheSize",
-          "Type": "Int",
-          "Desc": "Size of cache when trained in Scope",
-          "Aliases": [
-            "cache"
-          ],
-          "Required": false,
-          "SortOrder": 150.0,
-          "IsNullable": false,
-          "Default": 1000000
-        },
         {
           "Name": "BatchSize",
           "Type": "Int",
@@ -14272,7 +14250,7 @@
           }
         },
         {
-          "Name": "NumIterations",
+          "Name": "NumberOfIterations",
           "Type": "Int",
           "Desc": "Number of iterations",
           "Aliases": [
@@ -14291,11 +14269,12 @@
           }
         },
         {
-          "Name": "InitWtsDiameter",
+          "Name": "InitialWeightsDiameter",
           "Type": "Float",
           "Desc": "Init weights diameter",
           "Aliases": [
-            "initwts"
+            "initwts",
+            "initWtsDiameter"
           ],
           "Required": false,
           "SortOrder": 140.0,
@@ -14410,18 +14389,6 @@
               true
             ]
           }
-        },
-        {
-          "Name": "StreamingCacheSize",
-          "Type": "Int",
-          "Desc": "Size of cache when trained in Scope",
-          "Aliases": [
-            "cache"
-          ],
-          "Required": false,
-          "SortOrder": 150.0,
-          "IsNullable": false,
-          "Default": 1000000
         }
       ],
       "Outputs": [
diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs
index 8fcb00842e..7035791227 100644
--- a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs
+++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs
@@ -5448,11 +5448,10 @@ public void TestOvaMacroWithUncalibratedLearner()
                                         'RecencyGainMulti': false,
                                         'Averaged': true,
                                         'AveragedTolerance': 0.01,
-                                        'NumIterations': 1,
+                                        'NumberOfIterations': 1,
                                         'InitialWeights': null,
-                                        'InitWtsDiameter': 0.0,
+                                        'InitialWeightsDiameter': 0.0,
                                         'Shuffle': false,
-                                        'StreamingCacheSize': 1000000,
                                         'LabelColumn': 'Label',
                                         'TrainingData': '$Var_9ccc8bce4f6540eb8a244ab40585602a',
                                         'FeatureColumn': 'Features',

From 87be8dc40107662221f7852c181a26c8d58e3a9d Mon Sep 17 00:00:00 2001
From: Shahab Moradi <shmoradi@microsoft.com>
Date: Thu, 7 Feb 2019 16:00:25 -0800
Subject: [PATCH 06/14] Updated docs for AveragedPerceptron

---
 .../Standard/Online/AveragedLinear.cs         | 35 +++++++++++++++++
 .../Standard/Online/AveragedPerceptron.cs     | 19 ++++++---
 .../Standard/Online/OnlineLinear.cs           | 16 ++++++++
 .../Standard/Online/doc.xml                   | 39 -------------------
 .../StandardLearnersCatalog.cs                | 36 +++++++++++++----
 5 files changed, 93 insertions(+), 52 deletions(-)

diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs
index 85da68d1f9..a0f156fb74 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs
@@ -17,36 +17,71 @@ namespace Microsoft.ML.Trainers.Online
 {
     public abstract class AveragedLinearArguments : OnlineLinearArguments
     {
+        /// <summary>
+        /// <a href="tmpurl_lr">Learning rate</a>
+        /// </summary>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Learning rate", ShortName = "lr", SortOrder = 50)]
         [TGUI(Label = "Learning rate", SuggestedSweeps = "0.01,0.1,0.5,1.0")]
         [TlcModule.SweepableDiscreteParam("LearningRate", new object[] { 0.01, 0.1, 0.5, 1.0 })]
         public float LearningRate = AveragedDefaultArgs.LearningRate;
 
+        /// <summary>
+        /// <see langword="true" /> to decrease the <a href="tmpurl_lr">learning rate</a> as iterations progress; otherwise, <see langword="false" />.
+        /// Default is <see langword="false" />.
+        /// </summary>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Decrease learning rate", ShortName = "decreaselr", SortOrder = 50)]
         [TGUI(Label = "Decrease Learning Rate", Description = "Decrease learning rate as iterations progress")]
         [TlcModule.SweepableDiscreteParam("DecreaseLearningRate", new object[] { false, true })]
         public bool DecreaseLearningRate = AveragedDefaultArgs.DecreaseLearningRate;
 
+        /// <summary>
+        /// Number of examples after which weights will be reset to the current average.
+        /// Default is <see langword="null" />, which disables this feature.
+        /// </summary>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Number of examples after which weights will be reset to the current average", ShortName = "numreset")]
         public long? ResetWeightsAfterXExamples = null;
 
+        /// <summary>
+        /// <see langword="true" /> to update averaged weights only when loss is nonzero.
+        /// <see langword="false" /> to update averaged weights on every example.
+        /// Default is <see langword="true" />.
+        /// </summary>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Instead of updating averaged weights on every example, only update when loss is nonzero", ShortName = "lazy")]
         public bool DoLazyUpdates = true;
 
+        /// <summary>
+        /// L2 weight for <a href='tmpurl_regularization'>regularization</a>.
+        /// </summary>
         [Argument(ArgumentType.AtMostOnce, HelpText = "L2 Regularization Weight", ShortName = "reg", SortOrder = 50)]
         [TGUI(Label = "L2 Regularization Weight")]
         [TlcModule.SweepableFloatParam("L2RegularizerWeight", 0.0f, 0.4f)]
         public float L2RegularizerWeight = AveragedDefaultArgs.L2RegularizerWeight;
 
+        /// <summary>
+        /// Extra weight given to more recent updates.
+        /// Default is 0, i.e. no extra gain.
+        /// </summary>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Extra weight given to more recent updates", ShortName = "rg")]
         public float RecencyGain = 0;
 
+        /// <summary>
+        /// <see langword="true" /> means <see cref="RecencyGain"/> is multiplicative.
+        /// <see langword="false" /> means <see cref="RecencyGain"/> is additive.
+        /// Default is <see langword="false" />.
+        /// </summary>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Whether Recency Gain is multiplicative (vs. additive)", ShortName = "rgm")]
         public bool RecencyGainMulti = false;
 
+        /// <summary>
+        /// <see langword="true" /> to do averaging; otherwise, <see langword="false" />.
+        /// Default is <see langword="true" />.
+        /// </summary>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Do averaging?", ShortName = "avg")]
         public bool Averaged = true;
 
+        /// <summary>
+        /// The inexactness tolerance for averaging.
+        /// </summary>
         [Argument(ArgumentType.AtMostOnce, HelpText = "The inexactness tolerance for averaging", ShortName = "avgtol")]
         public float AveragedTolerance = (float)1e-2;
 
diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs
index c3610c7dd1..ac72e2aceb 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs
@@ -24,12 +24,10 @@
 
 namespace Microsoft.ML.Trainers.Online
 {
-    // This is an averaged perceptron classifier.
-    // Configurable subcomponents:
-    //     - Loss function. By default, hinge loss (aka max-margin avgd perceptron)
-    //     - Feature normalization. By default, rescaling between min and max values for every feature
-    //     - Prediction calibration to produce probabilities. Off by default, if on, uses exponential (aka Platt) calibration.
-    /// <include file='doc.xml' path='doc/members/member[@name="AP"]/*' />
+    /// <summary>
+    /// This is averaged perceptron trainer.
+    /// For usage details, please see <see cref="StandardLearnersCatalog.AveragedPerceptron(BinaryClassificationCatalog.BinaryClassificationTrainers, string, string, string, IClassificationLoss, float, bool, float, int)"/>
+    /// </summary>
     public sealed class AveragedPerceptronTrainer : AveragedLinearTrainer<BinaryPredictionTransformer<LinearBinaryModelParameters>, LinearBinaryModelParameters>
     {
         public const string LoadNameValue = "AveragedPerceptron";
@@ -41,12 +39,21 @@ public sealed class AveragedPerceptronTrainer : AveragedLinearTrainer<BinaryPred
 
         public sealed class Options : AveragedLinearArguments
         {
+            /// <summary>
+            /// The custom <a href="tmpurl_loss">loss</a>. Default is hinge loss.
+            /// </summary>
             [Argument(ArgumentType.Multiple, HelpText = "Loss Function", ShortName = "loss", SortOrder = 50)]
             public ISupportClassificationLossFactory LossFunction = new HingeLoss.Arguments();
 
+            /// <summary>
+            /// The <a href="tmpurl_calib">calibrator</a> for producing probabilities. Default is exponential (aka Platt) calibration.
+            /// </summary>
             [Argument(ArgumentType.AtMostOnce, HelpText = "The calibrator kind to apply to the predictor. Specify null for no calibration", Visibility = ArgumentAttribute.VisibilityType.EntryPointsOnly)]
             public ICalibratorTrainerFactory Calibrator = new PlattCalibratorTrainerFactory();
 
+            /// <summary>
+            /// The maximum number of examples to use when training the calibrator.
+            /// </summary>
             [Argument(ArgumentType.AtMostOnce, HelpText = "The maximum number of examples to use when training the calibrator", Visibility = ArgumentAttribute.VisibilityType.EntryPointsOnly)]
             public int MaxCalibrationExamples = 1000000;
 
diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs
index b41890a5fa..a1ff85ae01 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs
@@ -19,24 +19,40 @@ namespace Microsoft.ML.Trainers.Online
 
     public abstract class OnlineLinearArguments : LearnerInputBaseWithLabel
     {
+        /// <summary>
+        /// Number of training iterations through the data.
+        /// </summary>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Number of iterations", ShortName = "iter", SortOrder = 50)]
         [TGUI(Label = "Number of Iterations", Description = "Number of training iterations through data", SuggestedSweeps = "1,10,100")]
         [TlcModule.SweepableLongParamAttribute("NumIterations", 1, 100, stepSize: 10, isLogScale: true)]
         public int NumIterations = OnlineDefaultArgs.NumIterations;
 
+        /// <summary>
+        /// Initial weights and bias, comma-separated.
+        /// </summary>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Initial Weights and bias, comma-separated", ShortName = "initweights")]
         [TGUI(NoSweep = true)]
         public string InitialWeights;
 
+        /// <summary>
+        /// Initial weights scale.
+        /// </summary>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Init weights diameter", ShortName = "initwts", SortOrder = 140)]
         [TGUI(Label = "Initial Weights Scale", SuggestedSweeps = "0,0.1,0.5,1")]
         [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0.0f, 1.0f, numSteps: 5)]
         public float InitWtsDiameter = 0;
 
+        /// <summary>
+        /// <see langword="true" /> to shuffle data for each training iteration; otherwise, <see langword="false" />.
+        /// Default is <see langword="true" />.
+        /// </summary>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Whether to shuffle for each training iteration", ShortName = "shuf")]
         [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[] { false, true })]
         public bool Shuffle = true;
 
+        /// <summary>
+        /// Size of cache when trained in Scope.
+        /// </summary>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Size of cache when trained in Scope", ShortName = "cache")]
         public int StreamingCacheSize = 1000000;
 
diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/doc.xml b/src/Microsoft.ML.StandardLearners/Standard/Online/doc.xml
index 8e8f5dc2ba..292aeface5 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/Online/doc.xml
+++ b/src/Microsoft.ML.StandardLearners/Standard/Online/doc.xml
@@ -25,44 +25,5 @@
         </code>
       </example>
     </example>
-
-    <member name="AP">
-      <summary>
-        Averaged Perceptron Binary Classifier. 
-      </summary>
-      <remarks>
-        Perceptron is a classification algorithm that makes its predictions based on a linear function.
-        I.e., for an instance with feature values f0, f1,..., f_D-1, , the prediction is given by the sign of sigma[0,D-1] ( w_i * f_i), where w_0, w_1,...,w_D-1 are the weights computed by the algorithm.
-        <para>
-          Perceptron is an online algorithm, i.e., it processes the instances in the training set one at a time.
-          The weights are initialized to be 0, or some random values. Then, for each example in the training set, the value of sigma[0, D-1] (w_i * f_i) is computed.
-          If this value has the same sign as the label of the current example, the weights remain the same. If they have opposite signs,
-          the weights vector is updated by either subtracting or adding (if the label is negative or positive, respectively) the feature vector of the current example,
-          multiplied by a factor 0 &lt; a &lt;= 1, called the learning rate. In a generalization of this algorithm, the weights are updated by adding the feature vector multiplied by the learning rate,
-          and by the gradient of some loss function (in the specific case described above, the loss is hinge-loss, whose gradient is 1 when it is non-zero).
-        </para>
-        <para>
-          In Averaged Perceptron (AKA voted-perceptron), the weight vectors are stored,
-          together with a weight that counts the number of iterations it survived (this is equivalent to storing the weight vector after every iteration, regardless of whether it was updated or not).
-          The prediction is then calculated by taking the weighted average of all the sums sigma[0, D-1] (w_i * f_i) or the different weight vectors.
-        </para>
-        <para> For more information see:</para>
-        <para><a href='https://en.wikipedia.org/wiki/Perceptron'>Wikipedia entry for Perceptron</a></para>
-        <para><a href='https://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.48.8200'>Large Margin Classification Using the Perceptron Algorithm</a></para>
-      </remarks>
-    </member>
-    <example>
-      <example name="AP">
-        <code language="csharp">
-          new AveragedPerceptronBinaryClassifier
-          {
-            NumIterations = 10,
-            L2RegularizerWeight = 0.01f,
-            LossFunction = new ExpLossClassificationLossFunction()
-          }
-        </code>
-      </example>
-    </example>
-
   </members>
 </doc>
diff --git a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
index ae03b46bc8..736d5b1165 100644
--- a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
+++ b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
@@ -190,16 +190,37 @@ public static SdcaMultiClassTrainer StochasticDualCoordinateAscent(this Multicla
         }
 
         /// <summary>
-        /// Predict a target using a linear binary classification model trained with the AveragedPerceptron trainer.
+        /// Predict a target using a linear binary classification model trained with averaged perceptron trainer.
         /// </summary>
+        /// <remarks>
+        /// Perceptron is a classification algorithm that makes its predictions based on a linear function.
+        /// For instance with feature values f0, f1,..., f_D-1, the prediction is given by the sign of sigma[0, D-1] (w_i * f_i), where w_0, w_1,..., w_D-1 are the weights computed by the algorithm.
+        ///
+        /// Perceptron is an online algorithm, i.e., it processes the instances in the training set one at a time.
+        /// The weights are initialized to be 0, or some random values. Then, for each example in the training set, the value of sigma[0, D-1] (w_i * f_i) is computed.
+        /// If this value has the same sign as the label of the current example, the weights remain the same.If they have opposite signs,
+        /// the weights vector is updated by either subtracting or adding (if the label is negative or positive, respectively) the feature vector of the current example,
+        /// multiplied by a factor 0 &lt; a &lt;= 1, called the learning rate.In a generalization of this algorithm, the weights are updated by adding the feature vector multiplied by the learning rate,
+        /// and by the gradient of some loss function (in the specific case described above, the loss is hinge-loss, whose gradient is 1 when it is non-zero).
+        ///
+        /// In Averaged Perceptron (AKA voted-perceptron), the weight vectors are stored,
+        /// together with a weight that counts the number of iterations it survived (this is equivalent to storing the weight vector after every iteration, regardless of whether it was updated or not).
+        /// The prediction is then calculated by taking the weighted average of all the sums sigma[0, D-1] (w_i * f_i) or the different weight vectors.
+        ///
+        /// For more information see <a href="https://en.wikipedia.org/wiki/Perceptron">Wikipedia entry for Perceptron</a>
+        /// or <a href="https://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.48.8200">Large Margin Classification Using the Perceptron Algorithm</a>
+        /// </remarks>
         /// <param name="catalog">The binary classification catalog trainer object.</param>
         /// <param name="labelColumn">The name of the label column, or dependent variable.</param>
         /// <param name="featureColumn">The features, or independent variables.</param>
-        /// <param name="lossFunction">The custom loss.</param>
+        /// <param name="lossFunction">The custom <a href="tmpurl_loss">loss</a>. If <see langword="null"/>, hinge loss will be used resulting in max-margin averaged perceptron.</param>
         /// <param name="weights">The optional example weights.</param>
-        /// <param name="learningRate">The learning Rate.</param>
-        /// <param name="decreaseLearningRate">Decrease learning rate as iterations progress.</param>
-        /// <param name="l2RegularizerWeight">L2 regularization weight.</param>
+        /// <param name="learningRate"><a href="tmpurl_lr">Learning rate</a>.</param>
+        /// <param name="decreaseLearningRate">
+        /// <see langword="true" /> to decrease the <a href="tmpurl_calib">learning rate</a> as iterations progress; otherwise, <see langword="false" />.
+        /// Default is <see langword="false" />.
+        /// </param>
+        /// <param name="l2RegularizerWeight">L2 weight for <a href='tmpurl_regularization'>regularization</a>.</param>
         /// <param name="numIterations">Number of training iterations through the data.</param>
         public static AveragedPerceptronTrainer AveragedPerceptron(
             this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
@@ -219,10 +240,11 @@ public static AveragedPerceptronTrainer AveragedPerceptron(
         }
 
         /// <summary>
-        /// Predict a target using a linear binary classification model trained with the AveragedPerceptron trainer.
+        /// Predict a target using a linear binary classification model trained with averaged perceptron trainer using advanced options.
+        /// For trainer details, please see the remarks for <see cref="AveragedPerceptron(BinaryClassificationCatalog.BinaryClassificationTrainers, string, string, string, IClassificationLoss, float, bool, float, int)"/>
         /// </summary>
         /// <param name="catalog">The binary classification catalog trainer object.</param>
-        /// <param name="options">Advanced arguments to the algorithm.</param>
+        /// <param name="options">Advanced trainer options.</param>
         public static AveragedPerceptronTrainer AveragedPerceptron(
             this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, AveragedPerceptronTrainer.Options options)
         {

From ba0abff0bade3fb33e2c3f1e634fd0c92e2f8acf Mon Sep 17 00:00:00 2001
From: Shahab Moradi <shmoradi@microsoft.com>
Date: Fri, 8 Feb 2019 14:29:34 -0800
Subject: [PATCH 07/14] Added a sample

---
 .../AveragedPerceptron.cs                     | 61 +++++++++++++++++++
 src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs | 21 +++++++
 .../SamplesDatasetUtils.cs                    | 31 ++++++++++
 .../StandardLearnersCatalog.cs                |  9 ++-
 4 files changed, 121 insertions(+), 1 deletion(-)
 create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/BinaryClassification/AveragedPerceptron.cs
 create mode 100644 src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/BinaryClassification/AveragedPerceptron.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/BinaryClassification/AveragedPerceptron.cs
new file mode 100644
index 0000000000..8871711b61
--- /dev/null
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/BinaryClassification/AveragedPerceptron.cs
@@ -0,0 +1,61 @@
+﻿using Microsoft.ML;
+
+namespace Microsoft.ML.Samples.Dynamic.BinaryClassification
+{
+    public static class AveragedPerceptron
+    {
+        public static void Example()
+        {
+            // In this examples we will use the adult income dataset. The goal is to predict
+            // if a person's income is above $50K or not, based on different pieces of information about that person.
+            // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult
+
+            // Create a new context for ML.NET operations. It can be used for exception tracking and logging, 
+            // as a catalog of available operations and as the source of randomness.
+            // Setting the seed to a fixed number in this examples to make outputs deterministic.
+            var mlContext = new MLContext(seed: 0);
+
+            // Download the dataset and load it as IDataView
+            var data = SamplesUtils.DatasetUtils.LoadAdultDataset(mlContext);
+
+            // Leave out 10% of data for testing
+            var (trainData, testData) = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
+
+            // Create data processing pipeline
+            var pipeline =
+                // Convert categorical features to one-hot vectors
+                mlContext.Transforms.Categorical.OneHotEncoding("workclass")
+                .Append(mlContext.Transforms.Categorical.OneHotEncoding("education"))
+                .Append(mlContext.Transforms.Categorical.OneHotEncoding("marital-status"))
+                .Append(mlContext.Transforms.Categorical.OneHotEncoding("occupation"))
+                .Append(mlContext.Transforms.Categorical.OneHotEncoding("relationship"))
+                .Append(mlContext.Transforms.Categorical.OneHotEncoding("ethnicity"))
+                .Append(mlContext.Transforms.Categorical.OneHotEncoding("native-country"))
+                // Combine all features into one feature vector
+                .Append(mlContext.Transforms.Concatenate("Features", "workclass", "education", "marital-status",
+                    "occupation", "relationship", "ethnicity", "native-country", "age", "education-num", 
+                    "capital-gain", "capital-loss", "hours-per-week"))
+                // Min-max normalized all the features
+                .Append(mlContext.Transforms.Normalize("Features"))
+                // Add the trainer
+                .Append(mlContext.BinaryClassification.Trainers.AveragedPerceptron("IsOver50K", "Features"));
+
+            // Fit this pipeline to the training data
+            var model = pipeline.Fit(trainData);
+
+            // Evaluate how the model is doing on the test data
+            var dataWithPredictions = model.Transform(testData);
+            var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions, "IsOver50K");
+            SamplesUtils.ConsoleUtils.PrintBinaryClassificationMetrics(metrics);
+
+            // Output:
+            // Accuracy: 0.85
+            // AUC: 0.90
+            // F1 Score: 0.66
+            // Negative Precision: 0.89
+            // Negative Recall: 0.91
+            // Positive Precision: 0.69
+            // Positive Recall: 0.63
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs b/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs
new file mode 100644
index 0000000000..814a251d6a
--- /dev/null
+++ b/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs
@@ -0,0 +1,21 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+using Microsoft.ML.Data;
+
+namespace Microsoft.ML.SamplesUtils
+{
+    public static class ConsoleUtils
+    {
+        public static void PrintBinaryClassificationMetrics(BinaryClassificationMetrics metrics)
+        {
+            Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}");
+            Console.WriteLine($"AUC: {metrics.Auc:F2}");
+            Console.WriteLine($"F1 Score: {metrics.F1Score:F2}");
+            Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}");
+            Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}");
+            Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}");
+            Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}");
+        }
+    }
+}
diff --git a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs
index 17ce2e3ab7..e3969ba5e3 100644
--- a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs
+++ b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs
@@ -86,6 +86,37 @@ public static string DownloadSentimentDataset()
         public static string DownloadAdultDataset()
             => Download("https://raw.githubusercontent.com/dotnet/machinelearning/244a8c2ac832657af282aa312d568211698790aa/test/data/adult.train", "adult.txt");
 
+        public static IDataView LoadAdultDataset(MLContext mlContext)
+        {
+            // Download the file
+            string dataFile = DownloadAdultDataset();
+
+            // Define the columns to read
+            var reader = mlContext.Data.CreateTextLoader(
+                columns: new[]
+                    {
+                        new TextLoader.Column("age", DataKind.R4, 0),
+                        new TextLoader.Column("workclass", DataKind.TX, 1),
+                        new TextLoader.Column("fnlwgt", DataKind.R4, 2),
+                        new TextLoader.Column("education", DataKind.TX, 3),
+                        new TextLoader.Column("education-num", DataKind.R4, 4),
+                        new TextLoader.Column("marital-status", DataKind.TX, 5),
+                        new TextLoader.Column("occupation", DataKind.TX, 6),
+                        new TextLoader.Column("relationship", DataKind.TX, 7),
+                        new TextLoader.Column("ethnicity", DataKind.TX, 8),
+                        new TextLoader.Column("sex", DataKind.TX, 9),
+                        new TextLoader.Column("capital-gain", DataKind.R4, 10),
+                        new TextLoader.Column("capital-loss", DataKind.R4, 11),
+                        new TextLoader.Column("hours-per-week", DataKind.R4, 12),
+                        new TextLoader.Column("native-country", DataKind.R4, 13),
+                        new TextLoader.Column("IsOver50K", DataKind.BL, 14),
+                    },
+                separatorChar: ',',
+                hasHeader: true
+            );
+
+            return reader.Read(dataFile);
+        }
         /// <summary>
         /// Downloads the breast cancer dataset from the ML.NET repo.
         /// </summary>
diff --git a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
index 736d5b1165..f614c4098c 100644
--- a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
+++ b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
@@ -222,6 +222,13 @@ public static SdcaMultiClassTrainer StochasticDualCoordinateAscent(this Multicla
         /// </param>
         /// <param name="l2RegularizerWeight">L2 weight for <a href='tmpurl_regularization'>regularization</a>.</param>
         /// <param name="numIterations">Number of training iterations through the data.</param>
+        /// <example>
+        /// <format type="text/markdown">
+        /// <![CDATA[
+        /// [!code-csharp[AveragedPerceptron](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/BinaryClassification/AveragedPerceptron.cs)]
+        /// ]]>
+        /// </format>
+        /// </example>
         public static AveragedPerceptronTrainer AveragedPerceptron(
             this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
             string labelColumn = DefaultColumnNames.Label,
@@ -241,7 +248,7 @@ public static AveragedPerceptronTrainer AveragedPerceptron(
 
         /// <summary>
         /// Predict a target using a linear binary classification model trained with averaged perceptron trainer using advanced options.
-        /// For trainer details, please see the remarks for <see cref="AveragedPerceptron(BinaryClassificationCatalog.BinaryClassificationTrainers, string, string, string, IClassificationLoss, float, bool, float, int)"/>
+        /// For usage details, please see <see cref="AveragedPerceptron(BinaryClassificationCatalog.BinaryClassificationTrainers, string, string, string, IClassificationLoss, float, bool, float, int)"/>
         /// </summary>
         /// <param name="catalog">The binary classification catalog trainer object.</param>
         /// <param name="options">Advanced trainer options.</param>

From a5538edea7faaaf2fa93c76c854c366550949c24 Mon Sep 17 00:00:00 2001
From: Shahab Moradi <shmoradi@microsoft.com>
Date: Tue, 12 Feb 2019 11:23:53 -0800
Subject: [PATCH 08/14] Addressed PR comments

---
 .../AveragedPerceptron.cs                     | 61 -------------------
 .../AveragedPerceptron.cs                     | 45 ++++++++++++++
 src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs |  9 ++-
 .../Microsoft.ML.SamplesUtils.csproj          |  2 +
 .../SamplesDatasetUtils.cs                    | 25 +++++++-
 .../Standard/Online/AveragedLinear.cs         | 34 ++++++++---
 .../Standard/Online/AveragedPerceptron.cs     | 12 ++--
 .../Standard/Online/LinearSvm.cs              |  2 +-
 .../Standard/Online/OnlineGradientDescent.cs  |  2 +-
 .../Standard/Online/OnlineLinear.cs           | 33 +++++-----
 .../StandardLearnersCatalog.cs                | 13 ++--
 .../TestPredictors.cs                         |  2 +-
 .../Scenarios/Api/TestApi.cs                  |  2 +-
 test/Microsoft.ML.Tests/Scenarios/OvaTest.cs  |  2 +-
 14 files changed, 137 insertions(+), 107 deletions(-)
 delete mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/BinaryClassification/AveragedPerceptron.cs
 create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/BinaryClassification/AveragedPerceptron.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/BinaryClassification/AveragedPerceptron.cs
deleted file mode 100644
index 8871711b61..0000000000
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/BinaryClassification/AveragedPerceptron.cs
+++ /dev/null
@@ -1,61 +0,0 @@
-﻿using Microsoft.ML;
-
-namespace Microsoft.ML.Samples.Dynamic.BinaryClassification
-{
-    public static class AveragedPerceptron
-    {
-        public static void Example()
-        {
-            // In this examples we will use the adult income dataset. The goal is to predict
-            // if a person's income is above $50K or not, based on different pieces of information about that person.
-            // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult
-
-            // Create a new context for ML.NET operations. It can be used for exception tracking and logging, 
-            // as a catalog of available operations and as the source of randomness.
-            // Setting the seed to a fixed number in this examples to make outputs deterministic.
-            var mlContext = new MLContext(seed: 0);
-
-            // Download the dataset and load it as IDataView
-            var data = SamplesUtils.DatasetUtils.LoadAdultDataset(mlContext);
-
-            // Leave out 10% of data for testing
-            var (trainData, testData) = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
-
-            // Create data processing pipeline
-            var pipeline =
-                // Convert categorical features to one-hot vectors
-                mlContext.Transforms.Categorical.OneHotEncoding("workclass")
-                .Append(mlContext.Transforms.Categorical.OneHotEncoding("education"))
-                .Append(mlContext.Transforms.Categorical.OneHotEncoding("marital-status"))
-                .Append(mlContext.Transforms.Categorical.OneHotEncoding("occupation"))
-                .Append(mlContext.Transforms.Categorical.OneHotEncoding("relationship"))
-                .Append(mlContext.Transforms.Categorical.OneHotEncoding("ethnicity"))
-                .Append(mlContext.Transforms.Categorical.OneHotEncoding("native-country"))
-                // Combine all features into one feature vector
-                .Append(mlContext.Transforms.Concatenate("Features", "workclass", "education", "marital-status",
-                    "occupation", "relationship", "ethnicity", "native-country", "age", "education-num", 
-                    "capital-gain", "capital-loss", "hours-per-week"))
-                // Min-max normalized all the features
-                .Append(mlContext.Transforms.Normalize("Features"))
-                // Add the trainer
-                .Append(mlContext.BinaryClassification.Trainers.AveragedPerceptron("IsOver50K", "Features"));
-
-            // Fit this pipeline to the training data
-            var model = pipeline.Fit(trainData);
-
-            // Evaluate how the model is doing on the test data
-            var dataWithPredictions = model.Transform(testData);
-            var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions, "IsOver50K");
-            SamplesUtils.ConsoleUtils.PrintBinaryClassificationMetrics(metrics);
-
-            // Output:
-            // Accuracy: 0.85
-            // AUC: 0.90
-            // F1 Score: 0.66
-            // Negative Precision: 0.89
-            // Negative Recall: 0.91
-            // Positive Precision: 0.69
-            // Positive Recall: 0.63
-        }
-    }
-}
\ No newline at end of file
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs
new file mode 100644
index 0000000000..35bdefa434
--- /dev/null
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs
@@ -0,0 +1,45 @@
+﻿using Microsoft.ML;
+
+namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification
+{
+    public static class AveragedPerceptron
+    {
+        public static void Example()
+        {
+            // In this examples we will use the adult income dataset. The goal is to predict
+            // if a person's income is above $50K or not, based on different pieces of information about that person.
+            // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult
+
+            // Create a new context for ML.NET operations. It can be used for exception tracking and logging, 
+            // as a catalog of available operations and as the source of randomness.
+            // Setting the seed to a fixed number in this example to make outputs deterministic.
+            var mlContext = new MLContext(seed: 0);
+
+            // Download and featurize the dataset
+            var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);
+
+            // Leave out 10% of data for testing
+            var (trainData, testData) = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
+
+            // Create data training pipeline
+            var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron("IsOver50K", "Features");
+
+            // Fit this pipeline to the training data
+            var model = pipeline.Fit(trainData);
+
+            // Evaluate how the model is doing on the test data
+            var dataWithPredictions = model.Transform(testData);
+            var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions, "IsOver50K");
+            SamplesUtils.ConsoleUtils.PrintMetrics(metrics);
+
+            // Output:
+            // Accuracy: 0.85
+            // AUC: 0.90
+            // F1 Score: 0.66
+            // Negative Precision: 0.89
+            // Negative Recall: 0.91
+            // Positive Precision: 0.69
+            // Positive Recall: 0.63
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs b/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs
index 814a251d6a..83fafd8658 100644
--- a/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs
+++ b/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs
@@ -5,9 +5,16 @@
 
 namespace Microsoft.ML.SamplesUtils
 {
+    /// <summary>
+    /// Utilities for creating console outputs in samples' code.
+    /// </summary>
     public static class ConsoleUtils
     {
-        public static void PrintBinaryClassificationMetrics(BinaryClassificationMetrics metrics)
+        /// <summary>
+        /// Pretty-print BinaryClassificationMetrics objects.
+        /// </summary>
+        /// <param name="metrics">Binary classification metrics.</param>
+        public static void PrintMetrics(BinaryClassificationMetrics metrics)
         {
             Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}");
             Console.WriteLine($"AUC: {metrics.Auc:F2}");
diff --git a/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj b/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj
index e4d6c5d504..0bdb047d42 100644
--- a/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj
+++ b/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj
@@ -6,7 +6,9 @@
   </PropertyGroup>
 
   <ItemGroup>
+    <ProjectReference Include="..\Microsoft.ML.Core\Microsoft.ML.Core.csproj" />
     <ProjectReference Include="..\Microsoft.ML.Data\Microsoft.ML.Data.csproj" />
+    <ProjectReference Include="..\Microsoft.ML.Transforms\Microsoft.ML.Transforms.csproj" />
   </ItemGroup>
 
 </Project>
diff --git a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs
index e3969ba5e3..918cd26a9d 100644
--- a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs
+++ b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs
@@ -7,6 +7,7 @@
 using System.IO;
 using System.Net;
 using Microsoft.Data.DataView;
+using Microsoft.ML;
 using Microsoft.ML.Data;
 
 namespace Microsoft.ML.SamplesUtils
@@ -86,7 +87,7 @@ public static string DownloadSentimentDataset()
         public static string DownloadAdultDataset()
             => Download("https://raw.githubusercontent.com/dotnet/machinelearning/244a8c2ac832657af282aa312d568211698790aa/test/data/adult.train", "adult.txt");
 
-        public static IDataView LoadAdultDataset(MLContext mlContext)
+        public static IDataView LoadFeaturizedAdultDataset(MLContext mlContext)
         {
             // Download the file
             string dataFile = DownloadAdultDataset();
@@ -115,8 +116,28 @@ public static IDataView LoadAdultDataset(MLContext mlContext)
                 hasHeader: true
             );
 
-            return reader.Read(dataFile);
+            // Create data featurizing pipeline
+            var pipeline =
+                // Convert categorical features to one-hot vectors
+                mlContext.Transforms.Categorical.OneHotEncoding("workclass")
+                .Append(mlContext.Transforms.Categorical.OneHotEncoding("education"))
+                .Append(mlContext.Transforms.Categorical.OneHotEncoding("marital-status"))
+                .Append(mlContext.Transforms.Categorical.OneHotEncoding("occupation"))
+                .Append(mlContext.Transforms.Categorical.OneHotEncoding("relationship"))
+                .Append(mlContext.Transforms.Categorical.OneHotEncoding("ethnicity"))
+                .Append(mlContext.Transforms.Categorical.OneHotEncoding("native-country"))
+                // Combine all features into one feature vector
+                .Append(mlContext.Transforms.Concatenate("Features", "workclass", "education", "marital-status",
+                    "occupation", "relationship", "ethnicity", "native-country", "age", "education-num",
+                    "capital-gain", "capital-loss", "hours-per-week"))
+                // Min-max normalized all the features
+                .Append(mlContext.Transforms.Normalize("Features"));
+
+            var data = reader.Read(dataFile);
+            var featurizedData = pipeline.Fit(data).Transform(data);
+            return featurizedData;
         }
+
         /// <summary>
         /// Downloads the breast cancer dataset from the ML.NET repo.
         /// </summary>
diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs
index a0f156fb74..9234cd2df4 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs
@@ -18,7 +18,7 @@ namespace Microsoft.ML.Trainers.Online
     public abstract class AveragedLinearArguments : OnlineLinearArguments
     {
         /// <summary>
-        /// <a href="tmpurl_lr">Learning rate</a>
+        /// <a href="tmpurl_lr">Learning rate</a>.
         /// </summary>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Learning rate", ShortName = "lr", SortOrder = 50)]
         [TGUI(Label = "Learning rate", SuggestedSweeps = "0.01,0.1,0.5,1.0")]
@@ -26,9 +26,12 @@ public abstract class AveragedLinearArguments : OnlineLinearArguments
         public float LearningRate = AveragedDefaultArgs.LearningRate;
 
         /// <summary>
-        /// <see langword="true" /> to decrease the <a href="tmpurl_lr">learning rate</a> as iterations progress; otherwise, <see langword="false" />.
-        /// Default is <see langword="false" />.
+        /// Determine whether to decrease the <see cref="LearningRate"/> or not.
         /// </summary>
+        /// <value>
+        /// <see langword="true" /> to decrease the <see cref="LearningRate"/> as iterations progress; otherwise, <see langword="false" />.
+        /// Default is <see langword="false" />.
+        /// </value>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Decrease learning rate", ShortName = "decreaselr", SortOrder = 50)]
         [TGUI(Label = "Decrease Learning Rate", Description = "Decrease learning rate as iterations progress")]
         [TlcModule.SweepableDiscreteParam("DecreaseLearningRate", new object[] { false, true })]
@@ -36,16 +39,21 @@ public abstract class AveragedLinearArguments : OnlineLinearArguments
 
         /// <summary>
         /// Number of examples after which weights will be reset to the current average.
-        /// Default is <see langword="null" />, which disables this feature.
         /// </summary>
+        /// <value>
+        /// Default is <see langword="null" />, which disables this feature.
+        /// </value>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Number of examples after which weights will be reset to the current average", ShortName = "numreset")]
         public long? ResetWeightsAfterXExamples = null;
 
         /// <summary>
+        /// Determines when to update averaged weights.
+        /// </summary>
+        /// <value>
         /// <see langword="true" /> to update averaged weights only when loss is nonzero.
         /// <see langword="false" /> to update averaged weights on every example.
         /// Default is <see langword="true" />.
-        /// </summary>
+        /// </value>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Instead of updating averaged weights on every example, only update when loss is nonzero", ShortName = "lazy")]
         public bool DoLazyUpdates = true;
 
@@ -59,23 +67,31 @@ public abstract class AveragedLinearArguments : OnlineLinearArguments
 
         /// <summary>
         /// Extra weight given to more recent updates.
-        /// Default is 0, i.e. no extra gain.
         /// </summary>
+        /// <value>
+        /// Default is 0, i.e. no extra gain.
+        /// </value>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Extra weight given to more recent updates", ShortName = "rg")]
         public float RecencyGain = 0;
 
         /// <summary>
+        /// Determines whether <see cref="RecencyGain"/> is multiplicative or additive.
+        /// </summary>
+        /// <value>
         /// <see langword="true" /> means <see cref="RecencyGain"/> is multiplicative.
         /// <see langword="false" /> means <see cref="RecencyGain"/> is additive.
         /// Default is <see langword="false" />.
-        /// </summary>
+        /// </value>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Whether Recency Gain is multiplicative (vs. additive)", ShortName = "rgm")]
         public bool RecencyGainMulti = false;
 
         /// <summary>
+        /// Determines whether to do averaging or not.
+        /// </summary>
+        /// <value>
         /// <see langword="true" /> to do averaging; otherwise, <see langword="false" />.
         /// Default is <see langword="true" />.
-        /// </summary>
+        /// </value>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Do averaging?", ShortName = "avg")]
         public bool Averaged = true;
 
@@ -83,7 +99,7 @@ public abstract class AveragedLinearArguments : OnlineLinearArguments
         /// The inexactness tolerance for averaging.
         /// </summary>
         [Argument(ArgumentType.AtMostOnce, HelpText = "The inexactness tolerance for averaging", ShortName = "avgtol")]
-        public float AveragedTolerance = (float)1e-2;
+        internal float AveragedTolerance = (float)1e-2;
 
         [BestFriend]
         internal class AveragedDefaultArgs : OnlineDefaultArgs
diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs
index ac72e2aceb..d3dbdf619e 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs
@@ -26,8 +26,10 @@ namespace Microsoft.ML.Trainers.Online
 {
     /// <summary>
     /// This is averaged perceptron trainer.
-    /// For usage details, please see <see cref="StandardLearnersCatalog.AveragedPerceptron(BinaryClassificationCatalog.BinaryClassificationTrainers, string, string, string, IClassificationLoss, float, bool, float, int)"/>
     /// </summary>
+    /// <remarks>
+    /// For usage details, please see <see cref="StandardLearnersCatalog.AveragedPerceptron(BinaryClassificationCatalog.BinaryClassificationTrainers, string, string, string, IClassificationLoss, float, bool, float, int)"/>
+    /// </remarks>
     public sealed class AveragedPerceptronTrainer : AveragedLinearTrainer<BinaryPredictionTransformer<LinearBinaryModelParameters>, LinearBinaryModelParameters>
     {
         public const string LoadNameValue = "AveragedPerceptron";
@@ -40,7 +42,7 @@ public sealed class AveragedPerceptronTrainer : AveragedLinearTrainer<BinaryPred
         public sealed class Options : AveragedLinearArguments
         {
             /// <summary>
-            /// The custom <a href="tmpurl_loss">loss</a>. Default is hinge loss.
+            /// The custom <a href="tmpurl_loss">loss</a>.
             /// </summary>
             [Argument(ArgumentType.Multiple, HelpText = "Loss Function", ShortName = "loss", SortOrder = 50)]
             public ISupportClassificationLossFactory LossFunction = new HingeLoss.Arguments();
@@ -107,9 +109,9 @@ internal AveragedPerceptronTrainer(IHostEnvironment env, Options options)
         /// <param name="featureColumn">The name of the feature column.</param>
         /// <param name="weights">The optional name of the weights column.</param>
         /// <param name="learningRate">The learning rate. </param>
-        /// <param name="decreaseLearningRate">Wheather to decrease learning rate as iterations progress.</param>
+        /// <param name="decreaseLearningRate">Whether to decrease learning rate as iterations progress.</param>
         /// <param name="l2RegularizerWeight">L2 Regularization Weight.</param>
-        /// <param name="numIterations">The number of training iteraitons.</param>
+        /// <param name="numIterations">The number of training iterations.</param>
         internal AveragedPerceptronTrainer(IHostEnvironment env,
             string labelColumn = DefaultColumnNames.Label,
             string featureColumn = DefaultColumnNames.Features,
@@ -127,7 +129,7 @@ internal AveragedPerceptronTrainer(IHostEnvironment env,
                 LearningRate = learningRate,
                 DecreaseLearningRate = decreaseLearningRate,
                 L2RegularizerWeight = l2RegularizerWeight,
-                NumIterations = numIterations,
+                NumberOfIterations = numIterations,
                 LossFunction = new TrivialFactory(lossFunction ?? new HingeLoss())
             })
         {
diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/LinearSvm.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/LinearSvm.cs
index e1382b3038..2cd75e623c 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/Online/LinearSvm.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/Online/LinearSvm.cs
@@ -239,7 +239,7 @@ internal LinearSvmTrainer(IHostEnvironment env,
                 LabelColumn = labelColumn,
                 FeatureColumn = featureColumn,
                 InitialWeights = weightsColumn,
-                NumIterations = numIterations,
+                NumberOfIterations = numIterations,
             })
         {
         }
diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineGradientDescent.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineGradientDescent.cs
index 8982127170..b683aefb07 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineGradientDescent.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineGradientDescent.cs
@@ -114,7 +114,7 @@ internal OnlineGradientDescentTrainer(IHostEnvironment env,
                 LearningRate = learningRate,
                 DecreaseLearningRate = decreaseLearningRate,
                 L2RegularizerWeight = l2RegularizerWeight,
-                NumIterations = numIterations,
+                NumberOfIterations = numIterations,
                 LabelColumn = labelColumn,
                 FeatureColumn = featureColumn,
                 InitialWeights = weightsColumn,
diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs
index a1ff85ae01..4dbdc8da57 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs
@@ -25,7 +25,7 @@ public abstract class OnlineLinearArguments : LearnerInputBaseWithLabel
         [Argument(ArgumentType.AtMostOnce, HelpText = "Number of iterations", ShortName = "iter", SortOrder = 50)]
         [TGUI(Label = "Number of Iterations", Description = "Number of training iterations through data", SuggestedSweeps = "1,10,100")]
         [TlcModule.SweepableLongParamAttribute("NumIterations", 1, 100, stepSize: 10, isLogScale: true)]
-        public int NumIterations = OnlineDefaultArgs.NumIterations;
+        public int NumberOfIterations = OnlineDefaultArgs.NumIterations;
 
         /// <summary>
         /// Initial weights and bias, comma-separated.
@@ -35,12 +35,16 @@ public abstract class OnlineLinearArguments : LearnerInputBaseWithLabel
         public string InitialWeights;
 
         /// <summary>
-        /// Initial weights scale.
+        /// Initial weights and bias scale.
         /// </summary>
-        [Argument(ArgumentType.AtMostOnce, HelpText = "Init weights diameter", ShortName = "initwts", SortOrder = 140)]
+        /// <value>
+        /// This property is only used if the provided value is positive and <see cref="InitialWeights"/> is not specified.
+        /// The weights and bias will be randomly selected from InitialWeights * [-0.5,0.5] interval with uniform distribution.
+        /// </value>
+        [Argument(ArgumentType.AtMostOnce, HelpText = "Init weights diameter", ShortName = "initwts, initWtsDiameter", SortOrder = 140)]
         [TGUI(Label = "Initial Weights Scale", SuggestedSweeps = "0,0.1,0.5,1")]
         [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0.0f, 1.0f, numSteps: 5)]
-        public float InitWtsDiameter = 0;
+        public float InitialWeightsDiameter = 0;
 
         /// <summary>
         /// <see langword="true" /> to shuffle data for each training iteration; otherwise, <see langword="false" />.
@@ -50,12 +54,6 @@ public abstract class OnlineLinearArguments : LearnerInputBaseWithLabel
         [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[] { false, true })]
         public bool Shuffle = true;
 
-        /// <summary>
-        /// Size of cache when trained in Scope.
-        /// </summary>
-        [Argument(ArgumentType.AtMostOnce, HelpText = "Size of cache when trained in Scope", ShortName = "cache")]
-        public int StreamingCacheSize = 1000000;
-
         [BestFriend]
         internal class OnlineDefaultArgs
         {
@@ -150,13 +148,13 @@ protected TrainStateBase(IChannel ch, int numFeatures, LinearModelParameters pre
                     Weights = new VBuffer<float>(numFeatures, weightValues);
                     Bias = float.Parse(weightStr[numFeatures], CultureInfo.InvariantCulture);
                 }
-                else if (parent.Args.InitWtsDiameter > 0)
+                else if (parent.Args.InitialWeightsDiameter > 0)
                 {
                     var weightValues = new float[numFeatures];
                     for (int i = 0; i < numFeatures; i++)
-                        weightValues[i] = parent.Args.InitWtsDiameter * (parent.Host.Rand.NextSingle() - (float)0.5);
+                        weightValues[i] = parent.Args.InitialWeightsDiameter * (parent.Host.Rand.NextSingle() - (float)0.5);
                     Weights = new VBuffer<float>(numFeatures, weightValues);
-                    Bias = parent.Args.InitWtsDiameter * (parent.Host.Rand.NextSingle() - (float)0.5);
+                    Bias = parent.Args.InitialWeightsDiameter * (parent.Host.Rand.NextSingle() - (float)0.5);
                 }
                 else if (numFeatures <= 1000)
                     Weights = VBufferUtils.CreateDense<float>(numFeatures);
@@ -254,9 +252,8 @@ private protected OnlineLinearTrainer(OnlineLinearArguments args, IHostEnvironme
             : base(Contracts.CheckRef(env, nameof(env)).Register(name), TrainerUtils.MakeR4VecFeature(args.FeatureColumn), label, TrainerUtils.MakeR4ScalarWeightColumn(args.InitialWeights))
         {
             Contracts.CheckValue(args, nameof(args));
-            Contracts.CheckUserArg(args.NumIterations > 0, nameof(args.NumIterations), UserErrorPositive);
-            Contracts.CheckUserArg(args.InitWtsDiameter >= 0, nameof(args.InitWtsDiameter), UserErrorNonNegative);
-            Contracts.CheckUserArg(args.StreamingCacheSize > 0, nameof(args.StreamingCacheSize), UserErrorPositive);
+            Contracts.CheckUserArg(args.NumberOfIterations > 0, nameof(args.NumberOfIterations), UserErrorPositive);
+            Contracts.CheckUserArg(args.InitialWeightsDiameter >= 0, nameof(args.InitialWeightsDiameter), UserErrorNonNegative);
 
             Args = args;
             Name = name;
@@ -315,7 +312,7 @@ private void TrainCore(IChannel ch, RoleMappedData data, TrainStateBase state)
 
             var cursorFactory = new FloatLabelCursor.Factory(data, cursorOpt);
             long numBad = 0;
-            while (state.Iteration < Args.NumIterations)
+            while (state.Iteration < Args.NumberOfIterations)
             {
                 state.BeginIteration(ch);
 
@@ -333,7 +330,7 @@ private void TrainCore(IChannel ch, RoleMappedData data, TrainStateBase state)
             {
                 ch.Warning(
                     "Skipped {0} instances with missing features during training (over {1} iterations; {2} inst/iter)",
-                    numBad, Args.NumIterations, numBad / Args.NumIterations);
+                    numBad, Args.NumberOfIterations, numBad / Args.NumberOfIterations);
             }
         }
 
diff --git a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
index f614c4098c..047ea48440 100644
--- a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
+++ b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
@@ -193,11 +193,12 @@ public static SdcaMultiClassTrainer StochasticDualCoordinateAscent(this Multicla
         /// Predict a target using a linear binary classification model trained with averaged perceptron trainer.
         /// </summary>
         /// <remarks>
-        /// Perceptron is a classification algorithm that makes its predictions based on a linear function.
-        /// For instance with feature values f0, f1,..., f_D-1, the prediction is given by the sign of sigma[0, D-1] (w_i * f_i), where w_0, w_1,..., w_D-1 are the weights computed by the algorithm.
+        /// Perceptron is a classification algorithm that makes its predictions by finding a separating hyperplane.
+        /// For instance, with feature values f0, f1,..., f_D-1, the prediction is given by determining what side of the hyperplane the point falls into.
+        /// That is the same as the sign of sigma[0, D-1] (w_i * f_i), where w_0, w_1,..., w_D-1 are the weights computed by the algorithm.
         ///
-        /// Perceptron is an online algorithm, i.e., it processes the instances in the training set one at a time.
-        /// The weights are initialized to be 0, or some random values. Then, for each example in the training set, the value of sigma[0, D-1] (w_i * f_i) is computed.
+        /// The perceptron is an online algorithm, which means it processes the instances in the training set one at a time.
+        /// It starts with a set of initial weights (zero, random, or initialized from a previous learner). Then, for each example in the training set, the weighted sum of the features (sigma[0, D-1] (w_i * f_i)) is computed.
         /// If this value has the same sign as the label of the current example, the weights remain the same.If they have opposite signs,
         /// the weights vector is updated by either subtracting or adding (if the label is negative or positive, respectively) the feature vector of the current example,
         /// multiplied by a factor 0 &lt; a &lt;= 1, called the learning rate.In a generalization of this algorithm, the weights are updated by adding the feature vector multiplied by the learning rate,
@@ -217,7 +218,7 @@ public static SdcaMultiClassTrainer StochasticDualCoordinateAscent(this Multicla
         /// <param name="weights">The optional example weights.</param>
         /// <param name="learningRate"><a href="tmpurl_lr">Learning rate</a>.</param>
         /// <param name="decreaseLearningRate">
-        /// <see langword="true" /> to decrease the <a href="tmpurl_calib">learning rate</a> as iterations progress; otherwise, <see langword="false" />.
+        /// <see langword="true" /> to decrease the <paramref name="learningRate"/> as iterations progress; otherwise, <see langword="false" />.
         /// Default is <see langword="false" />.
         /// </param>
         /// <param name="l2RegularizerWeight">L2 weight for <a href='tmpurl_regularization'>regularization</a>.</param>
@@ -251,7 +252,7 @@ public static AveragedPerceptronTrainer AveragedPerceptron(
         /// For usage details, please see <see cref="AveragedPerceptron(BinaryClassificationCatalog.BinaryClassificationTrainers, string, string, string, IClassificationLoss, float, bool, float, int)"/>
         /// </summary>
         /// <param name="catalog">The binary classification catalog trainer object.</param>
-        /// <param name="options">Advanced trainer options.</param>
+        /// <param name="options">Trainer options.</param>
         public static AveragedPerceptronTrainer AveragedPerceptron(
             this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, AveragedPerceptronTrainer.Options options)
         {
diff --git a/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs b/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs
index 464fd0dc59..f509c71908 100644
--- a/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs
+++ b/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs
@@ -744,7 +744,7 @@ public void TestEnsembleCombiner()
                 {
                     FeatureColumn = "Features",
                     LabelColumn = DefaultColumnNames.Label,
-                    NumIterations = 2,
+                    NumberOfIterations = 2,
                     TrainingData = dataView,
                     NormalizeFeatures = NormalizeOption.No
                 }).PredictorModel,
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/TestApi.cs b/test/Microsoft.ML.Tests/Scenarios/Api/TestApi.cs
index 4b7fdbfaff..ac54587b65 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/TestApi.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/TestApi.cs
@@ -182,7 +182,7 @@ public void TrainAveragedPerceptronWithCache()
             var cached = mlContext.Data.Cache(xf);
 
             var estimator = mlContext.BinaryClassification.Trainers.AveragedPerceptron(
-                new AveragedPerceptronTrainer.Options { NumIterations = 2 });
+                new AveragedPerceptronTrainer.Options { NumberOfIterations = 2 });
 
             estimator.Fit(cached).Transform(cached);
 
diff --git a/test/Microsoft.ML.Tests/Scenarios/OvaTest.cs b/test/Microsoft.ML.Tests/Scenarios/OvaTest.cs
index 1ef0cda99c..9954669bd3 100644
--- a/test/Microsoft.ML.Tests/Scenarios/OvaTest.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/OvaTest.cs
@@ -131,7 +131,7 @@ public void OvaLinearSvm()
 
             // Pipeline
             var pipeline = mlContext.MulticlassClassification.Trainers.OneVersusAll(
-                mlContext.BinaryClassification.Trainers.LinearSupportVectorMachines(new LinearSvmTrainer.Options { NumIterations = 100 }),
+                mlContext.BinaryClassification.Trainers.LinearSupportVectorMachines(new LinearSvmTrainer.Options { NumberOfIterations = 100 }),
                 useProbabilities: false);
 
             var model = pipeline.Fit(data);

From 6b5606547497b3bb4a5e81a55523c9e4a41557a9 Mon Sep 17 00:00:00 2001
From: Shahab Moradi <shmoradi@microsoft.com>
Date: Tue, 12 Feb 2019 11:47:15 -0800
Subject: [PATCH 09/14] Added sample for the second overload with trainer
 options.

---
 .../AveragedPerceptronWithOptions.cs          | 58 +++++++++++++++++++
 .../StandardLearnersCatalog.cs                |  7 +++
 2 files changed, 65 insertions(+)
 create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs
new file mode 100644
index 0000000000..eaf8066398
--- /dev/null
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs
@@ -0,0 +1,58 @@
+﻿using Microsoft.ML;
+using Microsoft.ML.Trainers.Online;
+
+namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification
+{
+    public static class AveragedPerceptronWithOptions
+    {
+        public static void Example()
+        {
+            // In this examples we will use the adult income dataset. The goal is to predict
+            // if a person's income is above $50K or not, based on different pieces of information about that person.
+            // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult
+
+            // Create a new context for ML.NET operations. It can be used for exception tracking and logging, 
+            // as a catalog of available operations and as the source of randomness.
+            // Setting the seed to a fixed number in this example to make outputs deterministic.
+            var mlContext = new MLContext(seed: 0);
+
+            // Download and featurize the dataset
+            var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);
+
+            // Leave out 10% of data for testing
+            var (trainData, testData) = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
+
+            // Define the trainer options
+            var options = new AveragedPerceptronTrainer.Options()
+            {
+                LossFunction = new SmoothedHingeLoss.Arguments(),
+                LearningRate = 0.1f,
+                DoLazyUpdates = false,
+                RecencyGain = 0.1f,
+                NumberOfIterations = 10,
+                LabelColumn = "IsOver50K",
+                FeatureColumn = "Features"
+            };
+
+            // Create data training pipeline
+            var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron(options);
+
+            // Fit this pipeline to the training data
+            var model = pipeline.Fit(trainData);
+
+            // Evaluate how the model is doing on the test data
+            var dataWithPredictions = model.Transform(testData);
+            var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions, "IsOver50K");
+            SamplesUtils.ConsoleUtils.PrintMetrics(metrics);
+
+            // Output:
+            // Accuracy: 0.86
+            // AUC: 0.90
+            // F1 Score: 0.66
+            // Negative Precision: 0.89
+            // Negative Recall: 0.93
+            // Positive Precision: 0.72
+            // Positive Recall: 0.61
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
index 047ea48440..d78464acde 100644
--- a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
+++ b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
@@ -253,6 +253,13 @@ public static AveragedPerceptronTrainer AveragedPerceptron(
         /// </summary>
         /// <param name="catalog">The binary classification catalog trainer object.</param>
         /// <param name="options">Trainer options.</param>
+        /// <example>
+        /// <format type="text/markdown">
+        /// <![CDATA[
+        /// [!code-csharp[AveragedPerceptron](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/BinaryClassification/AveragedPerceptronWithOptions.cs)]
+        /// ]]>
+        /// </format>
+        /// </example>
         public static AveragedPerceptronTrainer AveragedPerceptron(
             this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, AveragedPerceptronTrainer.Options options)
         {

From c48773395579b33500591957ecab58e2beaf4fc5 Mon Sep 17 00:00:00 2001
From: Shahab Moradi <shmoradi@microsoft.com>
Date: Tue, 12 Feb 2019 14:20:19 -0800
Subject: [PATCH 10/14] Fixed the failing tests

---
 .../Common/EntryPoints/core_manifest.json     | 57 ++++---------------
 .../UnitTests/TestEntryPoints.cs              |  5 +-
 2 files changed, 14 insertions(+), 48 deletions(-)

diff --git a/test/BaselineOutput/Common/EntryPoints/core_manifest.json b/test/BaselineOutput/Common/EntryPoints/core_manifest.json
index 5154957bf1..5d720ce6de 100644
--- a/test/BaselineOutput/Common/EntryPoints/core_manifest.json
+++ b/test/BaselineOutput/Common/EntryPoints/core_manifest.json
@@ -4306,7 +4306,7 @@
           }
         },
         {
-          "Name": "NumIterations",
+          "Name": "NumberOfIterations",
           "Type": "Int",
           "Desc": "Number of iterations",
           "Aliases": [
@@ -4325,11 +4325,12 @@
           }
         },
         {
-          "Name": "InitWtsDiameter",
+          "Name": "InitialWeightsDiameter",
           "Type": "Float",
           "Desc": "Init weights diameter",
           "Aliases": [
-            "initwts"
+            "initwts",
+            "initWtsDiameter"
           ],
           "Required": false,
           "SortOrder": 140.0,
@@ -4467,18 +4468,6 @@
               true
             ]
           }
-        },
-        {
-          "Name": "StreamingCacheSize",
-          "Type": "Int",
-          "Desc": "Size of cache when trained in Scope",
-          "Aliases": [
-            "cache"
-          ],
-          "Required": false,
-          "SortOrder": 150.0,
-          "IsNullable": false,
-          "Default": 1000000
         }
       ],
       "Outputs": [
@@ -13247,7 +13236,7 @@
           }
         },
         {
-          "Name": "NumIterations",
+          "Name": "NumberOfIterations",
           "Type": "Int",
           "Desc": "Number of iterations",
           "Aliases": [
@@ -13266,11 +13255,12 @@
           }
         },
         {
-          "Name": "InitWtsDiameter",
+          "Name": "InitialWeightsDiameter",
           "Type": "Float",
           "Desc": "Init weights diameter",
           "Aliases": [
-            "initwts"
+            "initwts",
+            "initWtsDiameter"
           ],
           "Required": false,
           "SortOrder": 140.0,
@@ -13353,18 +13343,6 @@
             ]
           }
         },
-        {
-          "Name": "StreamingCacheSize",
-          "Type": "Int",
-          "Desc": "Size of cache when trained in Scope",
-          "Aliases": [
-            "cache"
-          ],
-          "Required": false,
-          "SortOrder": 150.0,
-          "IsNullable": false,
-          "Default": 1000000
-        },
         {
           "Name": "BatchSize",
           "Type": "Int",
@@ -14272,7 +14250,7 @@
           }
         },
         {
-          "Name": "NumIterations",
+          "Name": "NumberOfIterations",
           "Type": "Int",
           "Desc": "Number of iterations",
           "Aliases": [
@@ -14291,11 +14269,12 @@
           }
         },
         {
-          "Name": "InitWtsDiameter",
+          "Name": "InitialWeightsDiameter",
           "Type": "Float",
           "Desc": "Init weights diameter",
           "Aliases": [
-            "initwts"
+            "initwts",
+            "initWtsDiameter"
           ],
           "Required": false,
           "SortOrder": 140.0,
@@ -14410,18 +14389,6 @@
               true
             ]
           }
-        },
-        {
-          "Name": "StreamingCacheSize",
-          "Type": "Int",
-          "Desc": "Size of cache when trained in Scope",
-          "Aliases": [
-            "cache"
-          ],
-          "Required": false,
-          "SortOrder": 150.0,
-          "IsNullable": false,
-          "Default": 1000000
         }
       ],
       "Outputs": [
diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs
index 8b0c46b817..512329a123 100644
--- a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs
+++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs
@@ -5447,11 +5447,10 @@ public void TestOvaMacroWithUncalibratedLearner()
                                         'RecencyGainMulti': false,
                                         'Averaged': true,
                                         'AveragedTolerance': 0.01,
-                                        'NumIterations': 1,
+                                        'NumberOfIterations': 1,
                                         'InitialWeights': null,
-                                        'InitWtsDiameter': 0.0,
+                                        'InitialWeightsDiameter': 0.0,
                                         'Shuffle': false,
-                                        'StreamingCacheSize': 1000000,
                                         'LabelColumn': 'Label',
                                         'TrainingData': '$Var_9ccc8bce4f6540eb8a244ab40585602a',
                                         'FeatureColumn': 'Features',

From 59673b8f5e513768d4d3e83325334fa89f483da8 Mon Sep 17 00:00:00 2001
From: Shahab Moradi <shmoradi@microsoft.com>
Date: Tue, 12 Feb 2019 15:21:15 -0800
Subject: [PATCH 11/14] Fixed breaking changes from master. For sample code,
 changed numIterations to 10 as per Justin's request

---
 .../AveragedPerceptron.cs                     | 21 ++++++++++---------
 .../AveragedPerceptronWithOptions.cs          |  6 +++---
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs
index 35bdefa434..ee2e3fdd94 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs
@@ -19,27 +19,28 @@ public static void Example()
             var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);
 
             // Leave out 10% of data for testing
-            var (trainData, testData) = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
+            var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
 
             // Create data training pipeline
-            var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron("IsOver50K", "Features");
+            var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron(
+                                        "IsOver50K", "Features", numIterations: 10);
 
             // Fit this pipeline to the training data
-            var model = pipeline.Fit(trainData);
+            var model = pipeline.Fit(trainTestData.TrainSet);
 
             // Evaluate how the model is doing on the test data
-            var dataWithPredictions = model.Transform(testData);
+            var dataWithPredictions = model.Transform(trainTestData.TestSet);
             var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions, "IsOver50K");
             SamplesUtils.ConsoleUtils.PrintMetrics(metrics);
 
             // Output:
-            // Accuracy: 0.85
-            // AUC: 0.90
-            // F1 Score: 0.66
-            // Negative Precision: 0.89
+            // Accuracy: 0.86
+            // AUC: 0.91
+            // F1 Score: 0.68
+            // Negative Precision: 0.90
             // Negative Recall: 0.91
-            // Positive Precision: 0.69
-            // Positive Recall: 0.63
+            // Positive Precision: 0.70
+            // Positive Recall: 0.66
         }
     }
 }
\ No newline at end of file
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs
index eaf8066398..ac9296a96d 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs
@@ -20,7 +20,7 @@ public static void Example()
             var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);
 
             // Leave out 10% of data for testing
-            var (trainData, testData) = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
+            var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
 
             // Define the trainer options
             var options = new AveragedPerceptronTrainer.Options()
@@ -38,10 +38,10 @@ public static void Example()
             var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron(options);
 
             // Fit this pipeline to the training data
-            var model = pipeline.Fit(trainData);
+            var model = pipeline.Fit(trainTestData.TrainSet);
 
             // Evaluate how the model is doing on the test data
-            var dataWithPredictions = model.Transform(testData);
+            var dataWithPredictions = model.Transform(trainTestData.TestSet);
             var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions, "IsOver50K");
             SamplesUtils.ConsoleUtils.PrintMetrics(metrics);
 

From 3daf7bdeacdf834fe74f6af2a1cdb94552795bc9 Mon Sep 17 00:00:00 2001
From: Shahab Moradi <shmoradi@microsoft.com>
Date: Wed, 13 Feb 2019 14:06:14 -0800
Subject: [PATCH 12/14] Addressed the PR comments

---
 .../AveragedPerceptron.cs                     | 38 ++++++++---------
 .../AveragedPerceptronWithOptions.cs          | 41 +++++++++----------
 .../SamplesDatasetUtils.cs                    |  4 +-
 .../Standard/Online/AveragedLinear.cs         |  5 ++-
 .../Standard/Online/AveragedPerceptron.cs     |  7 +++-
 .../Standard/Online/OnlineLinear.cs           | 13 ++++--
 .../StandardLearnersCatalog.cs                | 29 ++++++-------
 7 files changed, 72 insertions(+), 65 deletions(-)

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs
index ee2e3fdd94..767d398dc6 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs
@@ -4,43 +4,41 @@ namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification
 {
     public static class AveragedPerceptron
     {
+        // In this examples we will use the adult income dataset. The goal is to predict
+        // if a person's income is above $50K or not, based on different pieces of information about that person.
+        // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult.
         public static void Example()
         {
-            // In this examples we will use the adult income dataset. The goal is to predict
-            // if a person's income is above $50K or not, based on different pieces of information about that person.
-            // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult
-
             // Create a new context for ML.NET operations. It can be used for exception tracking and logging, 
             // as a catalog of available operations and as the source of randomness.
             // Setting the seed to a fixed number in this example to make outputs deterministic.
             var mlContext = new MLContext(seed: 0);
 
-            // Download and featurize the dataset
+            // Download and featurize the dataset.
             var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);
 
-            // Leave out 10% of data for testing
+            // Leave out 10% of data for testing.
             var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
 
-            // Create data training pipeline
-            var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron(
-                                        "IsOver50K", "Features", numIterations: 10);
+            // Create data training pipeline.
+            var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron(numIterations: 10);
 
-            // Fit this pipeline to the training data
+            // Fit this pipeline to the training data.
             var model = pipeline.Fit(trainTestData.TrainSet);
 
-            // Evaluate how the model is doing on the test data
+            // Evaluate how the model is doing on the test data.
             var dataWithPredictions = model.Transform(trainTestData.TestSet);
-            var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions, "IsOver50K");
+            var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions);
             SamplesUtils.ConsoleUtils.PrintMetrics(metrics);
 
-            // Output:
-            // Accuracy: 0.86
-            // AUC: 0.91
-            // F1 Score: 0.68
-            // Negative Precision: 0.90
-            // Negative Recall: 0.91
-            // Positive Precision: 0.70
-            // Positive Recall: 0.66
+            // Expected output:
+            //   Accuracy: 0.86
+            //   AUC: 0.91
+            //   F1 Score: 0.68
+            //   Negative Precision: 0.90
+            //   Negative Recall: 0.91
+            //   Positive Precision: 0.70
+            //   Positive Recall: 0.66
         }
     }
 }
\ No newline at end of file
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs
index ac9296a96d..ee568bff92 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs
@@ -5,54 +5,51 @@ namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification
 {
     public static class AveragedPerceptronWithOptions
     {
+        // In this examples we will use the adult income dataset. The goal is to predict
+        // if a person's income is above $50K or not, based on different pieces of information about that person.
+        // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult.
         public static void Example()
         {
-            // In this examples we will use the adult income dataset. The goal is to predict
-            // if a person's income is above $50K or not, based on different pieces of information about that person.
-            // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult
-
             // Create a new context for ML.NET operations. It can be used for exception tracking and logging, 
             // as a catalog of available operations and as the source of randomness.
             // Setting the seed to a fixed number in this example to make outputs deterministic.
             var mlContext = new MLContext(seed: 0);
 
-            // Download and featurize the dataset
+            // Download and featurize the dataset.
             var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);
 
-            // Leave out 10% of data for testing
+            // Leave out 10% of data for testing.
             var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
 
-            // Define the trainer options
+            // Define the trainer options.
             var options = new AveragedPerceptronTrainer.Options()
             {
                 LossFunction = new SmoothedHingeLoss.Arguments(),
                 LearningRate = 0.1f,
                 DoLazyUpdates = false,
                 RecencyGain = 0.1f,
-                NumberOfIterations = 10,
-                LabelColumn = "IsOver50K",
-                FeatureColumn = "Features"
+                NumberOfIterations = 10
             };
 
-            // Create data training pipeline
+            // Create data training pipeline.
             var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron(options);
 
-            // Fit this pipeline to the training data
+            // Fit this pipeline to the training data.
             var model = pipeline.Fit(trainTestData.TrainSet);
 
-            // Evaluate how the model is doing on the test data
+            // Evaluate how the model is doing on the test data.
             var dataWithPredictions = model.Transform(trainTestData.TestSet);
-            var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions, "IsOver50K");
+            var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions);
             SamplesUtils.ConsoleUtils.PrintMetrics(metrics);
 
-            // Output:
-            // Accuracy: 0.86
-            // AUC: 0.90
-            // F1 Score: 0.66
-            // Negative Precision: 0.89
-            // Negative Recall: 0.93
-            // Positive Precision: 0.72
-            // Positive Recall: 0.61
+            // Expected output:
+            //  Accuracy: 0.86
+            //  AUC: 0.90
+            //  F1 Score: 0.66
+            //  Negative Precision: 0.89
+            //  Negative Recall: 0.93
+            //  Positive Precision: 0.72
+            //  Positive Recall: 0.61
         }
     }
 }
\ No newline at end of file
diff --git a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs
index 918cd26a9d..fb5bdcfe3e 100644
--- a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs
+++ b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs
@@ -117,9 +117,9 @@ public static IDataView LoadFeaturizedAdultDataset(MLContext mlContext)
             );
 
             // Create data featurizing pipeline
-            var pipeline =
+            var pipeline = mlContext.Transforms.CopyColumns("Label", "IsOver50K")
                 // Convert categorical features to one-hot vectors
-                mlContext.Transforms.Categorical.OneHotEncoding("workclass")
+                .Append(mlContext.Transforms.Categorical.OneHotEncoding("workclass"))
                 .Append(mlContext.Transforms.Categorical.OneHotEncoding("education"))
                 .Append(mlContext.Transforms.Categorical.OneHotEncoding("marital-status"))
                 .Append(mlContext.Transforms.Categorical.OneHotEncoding("occupation"))
diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs
index 9234cd2df4..f5919f8b47 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs
@@ -15,6 +15,9 @@
 
 namespace Microsoft.ML.Trainers.Online
 {
+    /// <summary>
+    /// Arguments class for averaged linear trainers.
+    /// </summary>
     public abstract class AveragedLinearArguments : OnlineLinearArguments
     {
         /// <summary>
@@ -30,7 +33,7 @@ public abstract class AveragedLinearArguments : OnlineLinearArguments
         /// </summary>
         /// <value>
         /// <see langword="true" /> to decrease the <see cref="LearningRate"/> as iterations progress; otherwise, <see langword="false" />.
-        /// Default is <see langword="false" />.
+        /// Default is <see langword="false" />. The learning rate will be reduced with every weight update proportional to the square root of the number of updates.
         /// </value>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Decrease learning rate", ShortName = "decreaselr", SortOrder = 50)]
         [TGUI(Label = "Decrease Learning Rate", Description = "Decrease learning rate as iterations progress")]
diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs
index d3dbdf619e..29b2573ffe 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs
@@ -25,7 +25,7 @@
 namespace Microsoft.ML.Trainers.Online
 {
     /// <summary>
-    /// This is averaged perceptron trainer.
+    /// The <see cref="IEstimator{TTransformer}"/> for the averaged perceptron trainer.
     /// </summary>
     /// <remarks>
     /// For usage details, please see <see cref="StandardLearnersCatalog.AveragedPerceptron(BinaryClassificationCatalog.BinaryClassificationTrainers, string, string, string, IClassificationLoss, float, bool, float, int)"/>
@@ -39,10 +39,13 @@ public sealed class AveragedPerceptronTrainer : AveragedLinearTrainer<BinaryPred
 
         private readonly Options _args;
 
+        /// <summary>
+        /// Options for the averaged perceptron trainer.
+        /// </summary>
         public sealed class Options : AveragedLinearArguments
         {
             /// <summary>
-            /// The custom <a href="tmpurl_loss">loss</a>.
+            /// A custom <a href="tmpurl_loss">loss</a>.
             /// </summary>
             [Argument(ArgumentType.Multiple, HelpText = "Loss Function", ShortName = "loss", SortOrder = 50)]
             public ISupportClassificationLossFactory LossFunction = new HingeLoss.Arguments();
diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs
index 4dbdc8da57..4790700a0d 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs
@@ -16,13 +16,15 @@
 
 namespace Microsoft.ML.Trainers.Online
 {
-
+    /// <summary>
+    /// Arguments class for online linear trainers.
+    /// </summary>
     public abstract class OnlineLinearArguments : LearnerInputBaseWithLabel
     {
         /// <summary>
-        /// Number of training iterations through the data.
+        /// Number of passes through the training dataset.
         /// </summary>
-        [Argument(ArgumentType.AtMostOnce, HelpText = "Number of iterations", ShortName = "iter", SortOrder = 50)]
+        [Argument(ArgumentType.AtMostOnce, HelpText = "Number of iterations", ShortName = "iter, numIterations", SortOrder = 50)]
         [TGUI(Label = "Number of Iterations", Description = "Number of training iterations through data", SuggestedSweeps = "1,10,100")]
         [TlcModule.SweepableLongParamAttribute("NumIterations", 1, 100, stepSize: 10, isLogScale: true)]
         public int NumberOfIterations = OnlineDefaultArgs.NumIterations;
@@ -47,9 +49,12 @@ public abstract class OnlineLinearArguments : LearnerInputBaseWithLabel
         public float InitialWeightsDiameter = 0;
 
         /// <summary>
+        /// Determines whether to shuffle data for each training iteration.
+        /// </summary>
+        /// <value>
         /// <see langword="true" /> to shuffle data for each training iteration; otherwise, <see langword="false" />.
         /// Default is <see langword="true" />.
-        /// </summary>
+        /// </value>
         [Argument(ArgumentType.AtMostOnce, HelpText = "Whether to shuffle for each training iteration", ShortName = "shuf")]
         [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[] { false, true })]
         public bool Shuffle = true;
diff --git a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
index d78464acde..b12340f4f1 100644
--- a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
+++ b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
@@ -190,23 +190,22 @@ public static SdcaMultiClassTrainer StochasticDualCoordinateAscent(this Multicla
         }
 
         /// <summary>
-        /// Predict a target using a linear binary classification model trained with averaged perceptron trainer.
+        /// Predict a target using a linear binary classification model trained with the averaged perceptron trainer.
         /// </summary>
         /// <remarks>
-        /// Perceptron is a classification algorithm that makes its predictions by finding a separating hyperplane.
+        /// The perceptron is a classification algorithm that makes its predictions by finding a separating hyperplane.
         /// For instance, with feature values f0, f1,..., f_D-1, the prediction is given by determining what side of the hyperplane the point falls into.
         /// That is the same as the sign of sigma[0, D-1] (w_i * f_i), where w_0, w_1,..., w_D-1 are the weights computed by the algorithm.
         ///
         /// The perceptron is an online algorithm, which means it processes the instances in the training set one at a time.
         /// It starts with a set of initial weights (zero, random, or initialized from a previous learner). Then, for each example in the training set, the weighted sum of the features (sigma[0, D-1] (w_i * f_i)) is computed.
-        /// If this value has the same sign as the label of the current example, the weights remain the same.If they have opposite signs,
-        /// the weights vector is updated by either subtracting or adding (if the label is negative or positive, respectively) the feature vector of the current example,
-        /// multiplied by a factor 0 &lt; a &lt;= 1, called the learning rate.In a generalization of this algorithm, the weights are updated by adding the feature vector multiplied by the learning rate,
+        /// If this value has the same sign as the label of the current example, the weights remain the same. If they have opposite signs,
+        /// the weights vector is updated by either adding or subtracting (if the label is positive or negative, respectively) the feature vector of the current example,
+        /// multiplied by a factor 0 &lt; a &lt;= 1, called the learning rate. In a generalization of this algorithm, the weights are updated by adding the feature vector multiplied by the learning rate,
         /// and by the gradient of some loss function (in the specific case described above, the loss is hinge-loss, whose gradient is 1 when it is non-zero).
         ///
-        /// In Averaged Perceptron (AKA voted-perceptron), the weight vectors are stored,
-        /// together with a weight that counts the number of iterations it survived (this is equivalent to storing the weight vector after every iteration, regardless of whether it was updated or not).
-        /// The prediction is then calculated by taking the weighted average of all the sums sigma[0, D-1] (w_i * f_i) or the different weight vectors.
+        /// In Averaged Perceptron (aka voted-perceptron), for each iteration, i.e. pass through the training data, a weight vector is calculated as explained above.
+        /// The final prediction is then calculate by averaging the weighted sum from each weight vector and looking at the sign of the result.
         ///
         /// For more information see <a href="https://en.wikipedia.org/wiki/Perceptron">Wikipedia entry for Perceptron</a>
         /// or <a href="https://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.48.8200">Large Margin Classification Using the Perceptron Algorithm</a>
@@ -214,7 +213,7 @@ public static SdcaMultiClassTrainer StochasticDualCoordinateAscent(this Multicla
         /// <param name="catalog">The binary classification catalog trainer object.</param>
         /// <param name="labelColumn">The name of the label column, or dependent variable.</param>
         /// <param name="featureColumn">The features, or independent variables.</param>
-        /// <param name="lossFunction">The custom <a href="tmpurl_loss">loss</a>. If <see langword="null"/>, hinge loss will be used resulting in max-margin averaged perceptron.</param>
+        /// <param name="lossFunction">A custom <a href="tmpurl_loss">loss</a>. If <see langword="null"/>, hinge loss will be used resulting in max-margin averaged perceptron.</param>
         /// <param name="weights">The optional example weights.</param>
         /// <param name="learningRate"><a href="tmpurl_lr">Learning rate</a>.</param>
         /// <param name="decreaseLearningRate">
@@ -222,11 +221,11 @@ public static SdcaMultiClassTrainer StochasticDualCoordinateAscent(this Multicla
         /// Default is <see langword="false" />.
         /// </param>
         /// <param name="l2RegularizerWeight">L2 weight for <a href='tmpurl_regularization'>regularization</a>.</param>
-        /// <param name="numIterations">Number of training iterations through the data.</param>
+        /// <param name="numIterations">Number of passes through the training dataset.</param>
         /// <example>
         /// <format type="text/markdown">
         /// <![CDATA[
-        /// [!code-csharp[AveragedPerceptron](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/BinaryClassification/AveragedPerceptron.cs)]
+        /// [!code-csharp[AveragedPerceptron](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs)]
         /// ]]>
         /// </format>
         /// </example>
@@ -248,15 +247,17 @@ public static AveragedPerceptronTrainer AveragedPerceptron(
         }
 
         /// <summary>
-        /// Predict a target using a linear binary classification model trained with averaged perceptron trainer using advanced options.
-        /// For usage details, please see <see cref="AveragedPerceptron(BinaryClassificationCatalog.BinaryClassificationTrainers, string, string, string, IClassificationLoss, float, bool, float, int)"/>
+        /// Predict a target using a linear binary classification model trained with the averaged perceptron trainer using advanced options.
         /// </summary>
+        /// <remarks>
+        /// For usage details, please see <see cref="AveragedPerceptron(BinaryClassificationCatalog.BinaryClassificationTrainers, string, string, string, IClassificationLoss, float, bool, float, int)"/>
+        /// </remarks>
         /// <param name="catalog">The binary classification catalog trainer object.</param>
         /// <param name="options">Trainer options.</param>
         /// <example>
         /// <format type="text/markdown">
         /// <![CDATA[
-        /// [!code-csharp[AveragedPerceptron](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/BinaryClassification/AveragedPerceptronWithOptions.cs)]
+        /// [!code-csharp[AveragedPerceptron](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs)]
         /// ]]>
         /// </format>
         /// </example>

From b10d7e6aa2c5eac720c508055f976ae83947da49 Mon Sep 17 00:00:00 2001
From: Shahab Moradi <shmoradi@microsoft.com>
Date: Wed, 13 Feb 2019 14:14:37 -0800
Subject: [PATCH 13/14] Moved algorithm remarks to the estimator class.

---
 .../Standard/Online/AveragedPerceptron.cs     | 17 ++++++++++++-
 .../StandardLearnersCatalog.cs                | 25 ++-----------------
 .../Common/EntryPoints/core_manifest.json     |  9 ++++---
 3 files changed, 24 insertions(+), 27 deletions(-)

diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs
index 29b2573ffe..e7348f096b 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs
@@ -28,7 +28,22 @@ namespace Microsoft.ML.Trainers.Online
     /// The <see cref="IEstimator{TTransformer}"/> for the averaged perceptron trainer.
     /// </summary>
     /// <remarks>
-    /// For usage details, please see <see cref="StandardLearnersCatalog.AveragedPerceptron(BinaryClassificationCatalog.BinaryClassificationTrainers, string, string, string, IClassificationLoss, float, bool, float, int)"/>
+    /// The perceptron is a classification algorithm that makes its predictions by finding a separating hyperplane.
+    /// For instance, with feature values f0, f1,..., f_D-1, the prediction is given by determining what side of the hyperplane the point falls into.
+    /// That is the same as the sign of sigma[0, D-1] (w_i * f_i), where w_0, w_1,..., w_D-1 are the weights computed by the algorithm.
+    ///
+    /// The perceptron is an online algorithm, which means it processes the instances in the training set one at a time.
+    /// It starts with a set of initial weights (zero, random, or initialized from a previous learner). Then, for each example in the training set, the weighted sum of the features (sigma[0, D-1] (w_i * f_i)) is computed.
+    /// If this value has the same sign as the label of the current example, the weights remain the same. If they have opposite signs,
+    /// the weights vector is updated by either adding or subtracting (if the label is positive or negative, respectively) the feature vector of the current example,
+    /// multiplied by a factor 0 &lt; a &lt;= 1, called the learning rate. In a generalization of this algorithm, the weights are updated by adding the feature vector multiplied by the learning rate,
+    /// and by the gradient of some loss function (in the specific case described above, the loss is hinge-loss, whose gradient is 1 when it is non-zero).
+    ///
+    /// In Averaged Perceptron (aka voted-perceptron), for each iteration, i.e. pass through the training data, a weight vector is calculated as explained above.
+    /// The final prediction is then calculate by averaging the weighted sum from each weight vector and looking at the sign of the result.
+    ///
+    /// For more information see <a href="https://en.wikipedia.org/wiki/Perceptron">Wikipedia entry for Perceptron</a>
+    /// or <a href="https://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.48.8200">Large Margin Classification Using the Perceptron Algorithm</a>
     /// </remarks>
     public sealed class AveragedPerceptronTrainer : AveragedLinearTrainer<BinaryPredictionTransformer<LinearBinaryModelParameters>, LinearBinaryModelParameters>
     {
diff --git a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
index b12340f4f1..a59ef319c9 100644
--- a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
+++ b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
@@ -190,26 +190,8 @@ public static SdcaMultiClassTrainer StochasticDualCoordinateAscent(this Multicla
         }
 
         /// <summary>
-        /// Predict a target using a linear binary classification model trained with the averaged perceptron trainer.
+        /// Predict a target using a linear binary classification model trained with <see cref="AveragedPerceptronTrainer"/>.
         /// </summary>
-        /// <remarks>
-        /// The perceptron is a classification algorithm that makes its predictions by finding a separating hyperplane.
-        /// For instance, with feature values f0, f1,..., f_D-1, the prediction is given by determining what side of the hyperplane the point falls into.
-        /// That is the same as the sign of sigma[0, D-1] (w_i * f_i), where w_0, w_1,..., w_D-1 are the weights computed by the algorithm.
-        ///
-        /// The perceptron is an online algorithm, which means it processes the instances in the training set one at a time.
-        /// It starts with a set of initial weights (zero, random, or initialized from a previous learner). Then, for each example in the training set, the weighted sum of the features (sigma[0, D-1] (w_i * f_i)) is computed.
-        /// If this value has the same sign as the label of the current example, the weights remain the same. If they have opposite signs,
-        /// the weights vector is updated by either adding or subtracting (if the label is positive or negative, respectively) the feature vector of the current example,
-        /// multiplied by a factor 0 &lt; a &lt;= 1, called the learning rate. In a generalization of this algorithm, the weights are updated by adding the feature vector multiplied by the learning rate,
-        /// and by the gradient of some loss function (in the specific case described above, the loss is hinge-loss, whose gradient is 1 when it is non-zero).
-        ///
-        /// In Averaged Perceptron (aka voted-perceptron), for each iteration, i.e. pass through the training data, a weight vector is calculated as explained above.
-        /// The final prediction is then calculate by averaging the weighted sum from each weight vector and looking at the sign of the result.
-        ///
-        /// For more information see <a href="https://en.wikipedia.org/wiki/Perceptron">Wikipedia entry for Perceptron</a>
-        /// or <a href="https://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.48.8200">Large Margin Classification Using the Perceptron Algorithm</a>
-        /// </remarks>
         /// <param name="catalog">The binary classification catalog trainer object.</param>
         /// <param name="labelColumn">The name of the label column, or dependent variable.</param>
         /// <param name="featureColumn">The features, or independent variables.</param>
@@ -247,11 +229,8 @@ public static AveragedPerceptronTrainer AveragedPerceptron(
         }
 
         /// <summary>
-        /// Predict a target using a linear binary classification model trained with the averaged perceptron trainer using advanced options.
+        /// Predict a target using a linear binary classification model trained with <see cref="AveragedPerceptronTrainer"/> and advanced options.
         /// </summary>
-        /// <remarks>
-        /// For usage details, please see <see cref="AveragedPerceptron(BinaryClassificationCatalog.BinaryClassificationTrainers, string, string, string, IClassificationLoss, float, bool, float, int)"/>
-        /// </remarks>
         /// <param name="catalog">The binary classification catalog trainer object.</param>
         /// <param name="options">Trainer options.</param>
         /// <example>
diff --git a/test/BaselineOutput/Common/EntryPoints/core_manifest.json b/test/BaselineOutput/Common/EntryPoints/core_manifest.json
index 5d720ce6de..a6e3b29dd9 100644
--- a/test/BaselineOutput/Common/EntryPoints/core_manifest.json
+++ b/test/BaselineOutput/Common/EntryPoints/core_manifest.json
@@ -4310,7 +4310,8 @@
           "Type": "Int",
           "Desc": "Number of iterations",
           "Aliases": [
-            "iter"
+            "iter",
+            "numIterations"
           ],
           "Required": false,
           "SortOrder": 50.0,
@@ -13240,7 +13241,8 @@
           "Type": "Int",
           "Desc": "Number of iterations",
           "Aliases": [
-            "iter"
+            "iter",
+            "numIterations"
           ],
           "Required": false,
           "SortOrder": 50.0,
@@ -14254,7 +14256,8 @@
           "Type": "Int",
           "Desc": "Number of iterations",
           "Aliases": [
-            "iter"
+            "iter",
+            "numIterations"
           ],
           "Required": false,
           "SortOrder": 50.0,

From 056a88792e4633699ee1c4d59477c0455d8276a3 Mon Sep 17 00:00:00 2001
From: Shahab Moradi <shmoradi@microsoft.com>
Date: Thu, 14 Feb 2019 12:30:49 -0800
Subject: [PATCH 14/14] Added doc xml docs

---
 src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs
index 1ce4d5810d..203bd6e6bd 100644
--- a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs
+++ b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs
@@ -87,6 +87,14 @@ public static string DownloadSentimentDataset()
         public static string DownloadAdultDataset()
             => Download("https://raw.githubusercontent.com/dotnet/machinelearning/244a8c2ac832657af282aa312d568211698790aa/test/data/adult.train", "adult.txt");
 
+        /// <summary>
+        /// Downloads the Adult UCI dataset and featurizes it to be suitable for classification tasks.
+        /// </summary>
+        /// <param name="mlContext"><see cref="MLContext"/> used for data loading and processing.</param>
+        /// <returns>Featurized dataset.</returns>
+        /// <remarks>
+        /// For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult.
+        /// </remarks>
         public static IDataView LoadFeaturizedAdultDataset(MLContext mlContext)
         {
             // Download the file