From 6541244cbbebd27958415c90798d007dc85d92fa Mon Sep 17 00:00:00 2001
From: Abhishek Goswami <abgoswam@gmail.com>
Date: Sun, 3 Mar 2019 18:53:55 +0000
Subject: [PATCH 1/4] sgd binary trainer

---
 ...GradientDescentNonCalibratedWithOptions.cs |  6 +-
 .../StochasticGradientDescentWithOptions.cs   |  2 +-
 .../Prediction/Calibrator.cs                  |  2 +-
 .../Scorers/PredictionTransformer.cs          | 19 ++--
 .../Standard/LinearModelParameters.cs         |  5 +-
 .../Standard/SdcaBinary.cs                    | 89 ++++++++++---------
 .../StandardLearnersCatalog.cs                | 12 +--
 src/Microsoft.ML.StaticPipe/SgdStatic.cs      | 12 +--
 .../Common/EntryPoints/core_manifest.json     | 20 +++--
 .../Training.cs                               |  4 +-
 .../FeatureContributionTests.cs               |  2 +-
 .../TrainerEstimators/TrainerEstimators.cs    |  2 +-
 12 files changed, 93 insertions(+), 82 deletions(-)
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescentNonCalibratedWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescentNonCalibratedWithOptions.cs
index 8746d971a2..4703fa87c7 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescentNonCalibratedWithOptions.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescentNonCalibratedWithOptions.cs
@@ -28,9 +28,9 @@ public static void Example()
                 .Trainers.StochasticGradientDescentNonCalibrated(
                         new SgdNonCalibratedBinaryTrainer.Options
                         {
-                            InitLearningRate = 0.01,
-                            MaxIterations = 10,
-                            L2Weight = 1e-7f
+                            InitialLearningRate = 0.01,
+                            NumberOfIterations = 10,
+                            L2Regularization = 1e-7f
                         }
                        );
 
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescentWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescentWithOptions.cs
index d28e0a19d1..1d74daba1b 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescentWithOptions.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescentWithOptions.cs
@@ -27,7 +27,7 @@ public static void Example()
                 // Make the convergence tolerance tighter.
                 ConvergenceTolerance = 5e-5,
                 // Increase the maximum number of passes over training data.
-                MaxIterations = 30,
+                NumberOfIterations = 30,
                 // Give the instances of the positive class slightly more weight.
                 PositiveInstanceWeight = 1.2f,
             };
diff --git a/src/Microsoft.ML.Data/Prediction/Calibrator.cs b/src/Microsoft.ML.Data/Prediction/Calibrator.cs
index 46e8063df0..d72cb120c7 100644
--- a/src/Microsoft.ML.Data/Prediction/Calibrator.cs
+++ b/src/Microsoft.ML.Data/Prediction/Calibrator.cs
@@ -173,7 +173,7 @@ public abstract class CalibratedModelParametersBase<TSubModel, TCalibrator> :
         where TSubModel : class
         where TCalibrator : class, ICalibrator
     {
-        protected readonly IHost Host;
+        private protected readonly IHost Host;
 
         // Strongly-typed members.
         /// <summary>
diff --git a/src/Microsoft.ML.Data/Scorers/PredictionTransformer.cs b/src/Microsoft.ML.Data/Scorers/PredictionTransformer.cs
index 7b94c92795..2aa4bd2e9d 100644
--- a/src/Microsoft.ML.Data/Scorers/PredictionTransformer.cs
+++ b/src/Microsoft.ML.Data/Scorers/PredictionTransformer.cs
@@ -52,7 +52,8 @@ public abstract class PredictionTransformerBase<TModel> : IPredictionTransformer
         private protected readonly IHost Host;
         [BestFriend]
         private protected ISchemaBindableMapper BindableMapper;
-        protected DataViewSchema TrainSchema;
+        [BestFriend]
+        private protected DataViewSchema TrainSchema;
 
         /// <summary>
         /// Whether a call to <see cref="ITransformer.GetRowToRowMapper(DataViewSchema)"/> should succeed, on an
@@ -142,7 +143,7 @@ IRowToRowMapper ITransformer.GetRowToRowMapper(DataViewSchema inputSchema)
 
         private protected abstract void SaveModel(ModelSaveContext ctx);
 
-        protected void SaveModelCore(ModelSaveContext ctx)
+        private protected void SaveModelCore(ModelSaveContext ctx)
         {
             // *** Binary format ***
             // <base info>
@@ -241,7 +242,7 @@ private protected override void SaveModel(ModelSaveContext ctx)
             SaveCore(ctx);
         }
 
-        protected virtual void SaveCore(ModelSaveContext ctx)
+        private protected virtual void SaveCore(ModelSaveContext ctx)
         {
             SaveModelCore(ctx);
             ctx.SaveStringOrNull(FeatureColumn);
@@ -296,7 +297,7 @@ private void SetScorer()
             Scorer = new BinaryClassifierScorer(Host, args, new EmptyDataView(Host, TrainSchema), BindableMapper.Bind(Host, schema), schema);
         }
 
-        protected override void SaveCore(ModelSaveContext ctx)
+        private protected override void SaveCore(ModelSaveContext ctx)
         {
             Contracts.AssertValue(ctx);
             ctx.SetVersionInfo(GetVersionInfo());
@@ -365,7 +366,7 @@ private void SetScorer()
             Scorer = new BinaryClassifierScorer(Host, args, new EmptyDataView(Host, TrainSchema), BindableMapper.Bind(Host, schema), schema);
         }
 
-        protected override void SaveCore(ModelSaveContext ctx)
+        private protected override void SaveCore(ModelSaveContext ctx)
         {
             Contracts.AssertValue(ctx);
             ctx.SetVersionInfo(GetVersionInfo());
@@ -429,7 +430,7 @@ private void SetScorer()
             Scorer = new MultiClassClassifierScorer(Host, args, new EmptyDataView(Host, TrainSchema), BindableMapper.Bind(Host, schema), schema);
         }
 
-        protected override void SaveCore(ModelSaveContext ctx)
+        private protected override void SaveCore(ModelSaveContext ctx)
         {
             Contracts.AssertValue(ctx);
             ctx.SetVersionInfo(GetVersionInfo());
@@ -474,7 +475,7 @@ internal RegressionPredictionTransformer(IHostEnvironment env, ModelLoadContext
             Scorer = GetGenericScorer();
         }
 
-        protected override void SaveCore(ModelSaveContext ctx)
+        private protected override void SaveCore(ModelSaveContext ctx)
         {
             Contracts.AssertValue(ctx);
             ctx.SetVersionInfo(GetVersionInfo());
@@ -516,7 +517,7 @@ internal RankingPredictionTransformer(IHostEnvironment env, ModelLoadContext ctx
             Scorer = GetGenericScorer();
         }
 
-        protected override void SaveCore(ModelSaveContext ctx)
+        private protected override void SaveCore(ModelSaveContext ctx)
         {
             Contracts.AssertValue(ctx);
             ctx.SetVersionInfo(GetVersionInfo());
@@ -568,7 +569,7 @@ internal ClusteringPredictionTransformer(IHostEnvironment env, ModelLoadContext
             Scorer = new ClusteringScorer(Host, args, new EmptyDataView(Host, TrainSchema), BindableMapper.Bind(Host, schema), schema);
         }
 
-        protected override void SaveCore(ModelSaveContext ctx)
+        private protected override void SaveCore(ModelSaveContext ctx)
         {
             Contracts.AssertValue(ctx);
             ctx.SetVersionInfo(GetVersionInfo());
diff --git a/src/Microsoft.ML.StandardLearners/Standard/LinearModelParameters.cs b/src/Microsoft.ML.StandardLearners/Standard/LinearModelParameters.cs
index 17c1eadc36..2fa9718519 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/LinearModelParameters.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/LinearModelParameters.cs
@@ -261,7 +261,7 @@ private protected virtual float Score(in VBuffer<float> src)
             return Bias + VectorUtils.DotProduct(in _weightsDense, in src);
         }
 
-        protected virtual void GetFeatureContributions(in VBuffer<float> features, ref VBuffer<float> contributions, int top, int bottom, bool normalize)
+        private protected virtual void GetFeatureContributions(in VBuffer<float> features, ref VBuffer<float> contributions, int top, int bottom, bool normalize)
         {
             if (features.Length != Weight.Length)
                 throw Contracts.Except("Input is of length {0} does not match expected length  of weights {1}", features.Length, Weight.Length);
@@ -402,6 +402,9 @@ ValueMapper<TSrc, VBuffer<float>> IFeatureContributionMapper.GetFeatureContribut
         }
     }
 
+    /// <summary>
+    /// The model parameters class for linear binary trainer estimators.
+    /// </summary>
     public sealed partial class LinearBinaryModelParameters : LinearModelParameters,
         ICanGetSummaryInKeyValuePairs,
         IParameterMixer<float>
diff --git a/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs b/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs
index 7e1526c1c2..1d9d782076 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs
@@ -1726,10 +1726,10 @@ public class OptionsBase : TrainerInputBaseWithWeight
             /// <summary>
             /// The L2 weight for <a href='tmpurl_regularization'>regularization</a>.
             /// </summary>
-            [Argument(ArgumentType.AtMostOnce, HelpText = "L2 Regularization constant", ShortName = "l2", SortOrder = 50)]
+            [Argument(ArgumentType.AtMostOnce, HelpText = "L2 Regularization constant", ShortName = "l2, L2Weight", SortOrder = 50)]
             [TGUI(Label = "L2 Regularization Constant", SuggestedSweeps = "1e-7,5e-7,1e-6,5e-6,1e-5")]
             [TlcModule.SweepableDiscreteParam("L2Const", new object[] { 1e-7f, 5e-7f, 1e-6f, 5e-6f, 1e-5f })]
-            public float L2Weight = Defaults.L2Weight;
+            public float L2Regularization = Defaults.L2Regularization;
 
             /// <summary>
             /// The degree of lock-free parallelism used by SGD.
@@ -1737,9 +1737,9 @@ public class OptionsBase : TrainerInputBaseWithWeight
             /// <value>
             /// Defaults to automatic depending on data sparseness. Determinism is not guaranteed.
             /// </value>
-            [Argument(ArgumentType.AtMostOnce, HelpText = "Degree of lock-free parallelism. Defaults to automatic depending on data sparseness. Determinism not guaranteed.", ShortName = "nt,t,threads", SortOrder = 50)]
+            [Argument(ArgumentType.AtMostOnce, HelpText = "Degree of lock-free parallelism. Defaults to automatic depending on data sparseness. Determinism not guaranteed.", ShortName = "nt,t,threads, NumThreads", SortOrder = 50)]
             [TGUI(Label = "Number of threads", SuggestedSweeps = "1,2,4")]
-            public int? NumThreads;
+            public int? NumberOfThreads;
 
             /// <summary>
             /// The convergence tolerance. If the exponential moving average of loss reductions falls below this tolerance,
@@ -1756,17 +1756,17 @@ public class OptionsBase : TrainerInputBaseWithWeight
             /// <value>
             /// Set to 1 to simulate online learning.
             /// </value>
-            [Argument(ArgumentType.AtMostOnce, HelpText = "Maximum number of iterations; set to 1 to simulate online learning.", ShortName = "iter")]
+            [Argument(ArgumentType.AtMostOnce, HelpText = "Maximum number of iterations; set to 1 to simulate online learning.", ShortName = "iter, MaxIterations")]
             [TGUI(Label = "Max number of iterations", SuggestedSweeps = "1,5,10,20")]
             [TlcModule.SweepableDiscreteParam("MaxIterations", new object[] { 1, 5, 10, 20 })]
-            public int MaxIterations = Defaults.MaxIterations;
+            public int NumberOfIterations = Defaults.NumberOfIterations;
 
             /// <summary>
             /// The initial <a href="tmpurl_lr">learning rate</a> used by SGD.
             /// </summary>
-            [Argument(ArgumentType.AtMostOnce, HelpText = "Initial learning rate (only used by SGD)", ShortName = "ilr,lr")]
+            [Argument(ArgumentType.AtMostOnce, HelpText = "Initial learning rate (only used by SGD)", ShortName = "ilr,lr, InitLearningRate")]
             [TGUI(Label = "Initial Learning Rate (for SGD)")]
-            public double InitLearningRate = Defaults.InitLearningRate;
+            public double InitialLearningRate = Defaults.InitialLearningRate;
 
             /// <summary>
             /// Determines whether to shuffle data for each training iteration.
@@ -1792,7 +1792,7 @@ public class OptionsBase : TrainerInputBaseWithWeight
             /// Determines the frequency of checking for convergence in terms of number of iterations.
             /// </summary>
             /// <value>
-            /// Default equals <see cref="NumThreads"/>."
+            /// Default equals <see cref="NumberOfThreads"/>."
             /// </value>
             [Argument(ArgumentType.AtMostOnce, HelpText = "Convergence check frequency (in terms of number of iterations). Default equals number of threads", ShortName = "checkFreq")]
             public int? CheckFrequency;
@@ -1800,17 +1800,17 @@ public class OptionsBase : TrainerInputBaseWithWeight
             internal void Check(IHostEnvironment env)
             {
                 Contracts.CheckValue(env, nameof(env));
-                env.CheckUserArg(L2Weight >= 0, nameof(L2Weight), "Must be non-negative.");
-                env.CheckUserArg(InitLearningRate > 0, nameof(InitLearningRate), "Must be positive.");
-                env.CheckUserArg(MaxIterations > 0, nameof(MaxIterations), "Must be positive.");
+                env.CheckUserArg(L2Regularization >= 0, nameof(L2Regularization), "Must be non-negative.");
+                env.CheckUserArg(InitialLearningRate > 0, nameof(InitialLearningRate), "Must be positive.");
+                env.CheckUserArg(NumberOfIterations > 0, nameof(NumberOfIterations), "Must be positive.");
                 env.CheckUserArg(PositiveInstanceWeight > 0, nameof(PositiveInstanceWeight), "Must be positive");
 
-                if (InitLearningRate * L2Weight >= 1)
+                if (InitialLearningRate * L2Regularization >= 1)
                 {
                     using (var ch = env.Start("Argument Adjustment"))
                     {
-                        ch.Warning("{0} {1} set too high; reducing to {1}", nameof(InitLearningRate),
-                            InitLearningRate, InitLearningRate = (float)0.5 / L2Weight);
+                        ch.Warning("{0} {1} set too high; reducing to {1}", nameof(InitialLearningRate),
+                            InitialLearningRate, InitialLearningRate = (float)0.5 / L2Regularization);
                     }
                 }
 
@@ -1821,9 +1821,9 @@ internal void Check(IHostEnvironment env)
             [BestFriend]
             internal static class Defaults
             {
-                public const float L2Weight = 1e-6f;
-                public const int MaxIterations = 20;
-                public const double InitLearningRate = 0.01;
+                public const float L2Regularization = 1e-6f;
+                public const int NumberOfIterations = 20;
+                public const double InitialLearningRate = 0.01;
             }
         }
 
@@ -1853,18 +1853,18 @@ internal SgdBinaryTrainerBase(IHostEnvironment env,
             string featureColumn = DefaultColumnNames.Features,
             string weightColumn = null,
             IClassificationLoss loss = null,
-            int maxIterations = OptionsBase.Defaults.MaxIterations,
-            double initLearningRate = OptionsBase.Defaults.InitLearningRate,
-            float l2Weight = OptionsBase.Defaults.L2Weight)
+            int maxIterations = OptionsBase.Defaults.NumberOfIterations,
+            double initLearningRate = OptionsBase.Defaults.InitialLearningRate,
+            float l2Weight = OptionsBase.Defaults.L2Regularization)
             : base(env, featureColumn, TrainerUtils.MakeBoolScalarLabel(labelColumn), weightColumn)
         {
             Host.CheckNonEmpty(featureColumn, nameof(featureColumn));
             Host.CheckNonEmpty(labelColumn, nameof(labelColumn));
 
             _options = new OptionsBase();
-            _options.MaxIterations = maxIterations;
-            _options.InitLearningRate = initLearningRate;
-            _options.L2Weight = l2Weight;
+            _options.NumberOfIterations = maxIterations;
+            _options.InitialLearningRate = initLearningRate;
+            _options.L2Regularization = l2Weight;
 
             _options.FeatureColumnName = featureColumn;
             _options.LabelColumnName = labelColumn;
@@ -1917,10 +1917,10 @@ private protected override TModel TrainCore(IChannel ch, RoleMappedData data, Li
             var cursorFactory = new FloatLabelCursor.Factory(data, cursorOpt);
 
             int numThreads;
-            if (_options.NumThreads.HasValue)
+            if (_options.NumberOfThreads.HasValue)
             {
-                numThreads = _options.NumThreads.Value;
-                ch.CheckUserArg(numThreads > 0, nameof(_options.NumThreads), "The number of threads must be either null or a positive integer.");
+                numThreads = _options.NumberOfThreads.Value;
+                ch.CheckUserArg(numThreads > 0, nameof(_options.NumberOfThreads), "The number of threads must be either null or a positive integer.");
             }
             else
                 numThreads = ComputeNumThreads(cursorFactory);
@@ -1929,7 +1929,7 @@ private protected override TModel TrainCore(IChannel ch, RoleMappedData data, Li
             int checkFrequency = _options.CheckFrequency ?? numThreads;
             if (checkFrequency <= 0)
                 checkFrequency = int.MaxValue;
-            var l2Weight = _options.L2Weight;
+            var l2Weight = _options.L2Regularization;
             var lossFunc = Loss;
             var pOptions = new ParallelOptions { MaxDegreeOfParallelism = numThreads };
             var positiveInstanceWeight = _options.PositiveInstanceWeight;
@@ -1954,7 +1954,7 @@ private protected override TModel TrainCore(IChannel ch, RoleMappedData data, Li
             // REVIEW: Investigate using parallel row cursor set instead of getting cursor independently. The convergence of SDCA need to be verified.
             Action<int, IProgressChannel> checkConvergence = (e, pch) =>
             {
-                if (e % checkFrequency == 0 && e != _options.MaxIterations)
+                if (e % checkFrequency == 0 && e != _options.NumberOfIterations)
                 {
                     Double trainTime = watch.Elapsed.TotalSeconds;
                     var lossSum = new CompensatedSum();
@@ -1980,7 +1980,7 @@ private protected override TModel TrainCore(IChannel ch, RoleMappedData data, Li
                     improvement = improvement == 0 ? loss - newLoss : 0.5 * (loss - newLoss + improvement);
                     loss = newLoss;
 
-                    pch.Checkpoint(loss, improvement, e, _options.MaxIterations);
+                    pch.Checkpoint(loss, improvement, e, _options.NumberOfIterations);
                     converged = improvement < _options.ConvergenceTolerance;
                 }
             };
@@ -1990,11 +1990,11 @@ private protected override TModel TrainCore(IChannel ch, RoleMappedData data, Li
             //Reference: Leon Bottou. Stochastic Gradient Descent Tricks.
             //https://research.microsoft.com/pubs/192769/tricks-2012.pdf
 
-            var trainingTasks = new Action<Random, IProgressChannel>[_options.MaxIterations];
-            var rands = new Random[_options.MaxIterations];
-            var ilr = _options.InitLearningRate;
+            var trainingTasks = new Action<Random, IProgressChannel>[_options.NumberOfIterations];
+            var rands = new Random[_options.NumberOfIterations];
+            var ilr = _options.InitialLearningRate;
             long t = 0;
-            for (int epoch = 1; epoch <= _options.MaxIterations; epoch++)
+            for (int epoch = 1; epoch <= _options.NumberOfIterations; epoch++)
             {
                 int e = epoch; //localize the modified closure
                 rands[e - 1] = RandomUtils.Create(Host.Rand.Next());
@@ -2055,9 +2055,9 @@ private protected override TModel TrainCore(IChannel ch, RoleMappedData data, Li
                 {
                     int iter = 0;
                     pch.SetHeader(new ProgressHeader(new[] { "Loss", "Improvement" }, new[] { "iterations" }),
-                        entry => entry.SetProgress(0, iter, _options.MaxIterations));
+                        entry => entry.SetProgress(0, iter, _options.NumberOfIterations));
                     // Synchorized SGD.
-                    for (int i = 0; i < _options.MaxIterations; i++)
+                    for (int i = 0; i < _options.NumberOfIterations; i++)
                     {
                         iter = i;
                         trainingTasks[i](rands[i], pch);
@@ -2075,7 +2075,7 @@ private protected override TModel TrainCore(IChannel ch, RoleMappedData data, Li
                             // REVIEW: technically, we could keep track of how many iterations have started,
                             // but this needs more synchronization than Parallel.For allows.
                         });
-                    Parallel.For(0, _options.MaxIterations, pOptions, i => trainingTasks[i](rands[i], pch));
+                    Parallel.For(0, _options.NumberOfIterations, pOptions, i => trainingTasks[i](rands[i], pch));
                     //note that P.Invoke will wait until all tasks finish
                 }
             }
@@ -2142,9 +2142,9 @@ internal SgdBinaryTrainer(IHostEnvironment env,
             string labelColumn = DefaultColumnNames.Label,
             string featureColumn = DefaultColumnNames.Features,
             string weightColumn = null,
-            int maxIterations = Options.Defaults.MaxIterations,
-            double initLearningRate = Options.Defaults.InitLearningRate,
-            float l2Weight = Options.Defaults.L2Weight)
+            int maxIterations = Options.Defaults.NumberOfIterations,
+            double initLearningRate = Options.Defaults.InitialLearningRate,
+            float l2Weight = Options.Defaults.L2Regularization)
             : base(env, labelColumn, featureColumn, weightColumn, new LogLoss(), maxIterations, initLearningRate, l2Weight)
         {
         }
@@ -2196,6 +2196,9 @@ public sealed class SgdNonCalibratedBinaryTrainer :
     {
         public sealed class Options : OptionsBase
         {
+            /// <summary>
+            /// The loss function to use. Default is <see cref="LogLoss"/>.
+            /// </summary>
             [Argument(ArgumentType.Multiple, HelpText = "Loss Function", ShortName = "loss", SortOrder = 50)]
             public IClassificationLoss Loss = new LogLoss();
         }
@@ -2204,9 +2207,9 @@ internal SgdNonCalibratedBinaryTrainer(IHostEnvironment env,
             string labelColumn = DefaultColumnNames.Label,
             string featureColumn = DefaultColumnNames.Features,
             string weightColumn = null,
-            int maxIterations = Options.Defaults.MaxIterations,
-            double initLearningRate = Options.Defaults.InitLearningRate,
-            float l2Weight = Options.Defaults.L2Weight,
+            int maxIterations = Options.Defaults.NumberOfIterations,
+            double initLearningRate = Options.Defaults.InitialLearningRate,
+            float l2Weight = Options.Defaults.L2Regularization,
             IClassificationLoss loss = null)
             : base(env, labelColumn, featureColumn, weightColumn, loss, maxIterations, initLearningRate, l2Weight)
         {
diff --git a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
index 20f672d8fd..1551d83124 100644
--- a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
+++ b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
@@ -36,9 +36,9 @@ public static SgdBinaryTrainer StochasticGradientDescent(this BinaryClassificati
             string labelColumnName = DefaultColumnNames.Label,
             string featureColumnName = DefaultColumnNames.Features,
             string exampleWeightColumnName = null,
-            int maxIterations = SgdBinaryTrainer.Options.Defaults.MaxIterations,
-            double initLearningRate = SgdBinaryTrainer.Options.Defaults.InitLearningRate,
-            float l2Weight = SgdBinaryTrainer.Options.Defaults.L2Weight)
+            int maxIterations = SgdBinaryTrainer.Options.Defaults.NumberOfIterations,
+            double initLearningRate = SgdBinaryTrainer.Options.Defaults.InitialLearningRate,
+            float l2Weight = SgdBinaryTrainer.Options.Defaults.L2Regularization)
         {
             Contracts.CheckValue(catalog, nameof(catalog));
             var env = CatalogUtils.GetEnvironment(catalog);
@@ -91,9 +91,9 @@ public static SgdNonCalibratedBinaryTrainer StochasticGradientDescentNonCalibrat
             string featureColumnName = DefaultColumnNames.Features,
             string exampleWeightColumnName = null,
             IClassificationLoss loss = null,
-            int maxIterations = SgdNonCalibratedBinaryTrainer.Options.Defaults.MaxIterations,
-            double initLearningRate = SgdNonCalibratedBinaryTrainer.Options.Defaults.InitLearningRate,
-            float l2Weight = SgdNonCalibratedBinaryTrainer.Options.Defaults.L2Weight)
+            int maxIterations = SgdNonCalibratedBinaryTrainer.Options.Defaults.NumberOfIterations,
+            double initLearningRate = SgdNonCalibratedBinaryTrainer.Options.Defaults.InitialLearningRate,
+            float l2Weight = SgdNonCalibratedBinaryTrainer.Options.Defaults.L2Regularization)
         {
             Contracts.CheckValue(catalog, nameof(catalog));
             var env = CatalogUtils.GetEnvironment(catalog);
diff --git a/src/Microsoft.ML.StaticPipe/SgdStatic.cs b/src/Microsoft.ML.StaticPipe/SgdStatic.cs
index dad7e9b57f..032211bac2 100644
--- a/src/Microsoft.ML.StaticPipe/SgdStatic.cs
+++ b/src/Microsoft.ML.StaticPipe/SgdStatic.cs
@@ -34,9 +34,9 @@ public static (Scalar<float> score, Scalar<float> probability, Scalar<bool> pred
             Scalar<bool> label,
             Vector<float> features,
             Scalar<float> weights = null,
-            int maxIterations = SgdBinaryTrainer.Options.Defaults.MaxIterations,
-            double initLearningRate = SgdBinaryTrainer.Options.Defaults.InitLearningRate,
-            float l2Weight = SgdBinaryTrainer.Options.Defaults.L2Weight,
+            int maxIterations = SgdBinaryTrainer.Options.Defaults.NumberOfIterations,
+            double initLearningRate = SgdBinaryTrainer.Options.Defaults.InitialLearningRate,
+            float l2Weight = SgdBinaryTrainer.Options.Defaults.L2Regularization,
             Action<CalibratedModelParametersBase<LinearBinaryModelParameters, PlattCalibrator>> onFit = null)
         {
             var rec = new TrainerEstimatorReconciler.BinaryClassifier(
@@ -115,9 +115,9 @@ public static (Scalar<float> score, Scalar<bool> predictedLabel) StochasticGradi
             Scalar<bool> label,
             Vector<float> features,
             Scalar<float> weights = null,
-            int maxIterations = SgdNonCalibratedBinaryTrainer.Options.Defaults.MaxIterations,
-            double initLearningRate = SgdNonCalibratedBinaryTrainer.Options.Defaults.InitLearningRate,
-            float l2Weight = SgdNonCalibratedBinaryTrainer.Options.Defaults.L2Weight,
+            int maxIterations = SgdNonCalibratedBinaryTrainer.Options.Defaults.NumberOfIterations,
+            double initLearningRate = SgdNonCalibratedBinaryTrainer.Options.Defaults.InitialLearningRate,
+            float l2Weight = SgdNonCalibratedBinaryTrainer.Options.Defaults.L2Regularization,
             IClassificationLoss loss = null,
             Action<LinearBinaryModelParameters> onFit = null)
         {
diff --git a/test/BaselineOutput/Common/EntryPoints/core_manifest.json b/test/BaselineOutput/Common/EntryPoints/core_manifest.json
index d3e0180dbd..9ee352c935 100644
--- a/test/BaselineOutput/Common/EntryPoints/core_manifest.json
+++ b/test/BaselineOutput/Common/EntryPoints/core_manifest.json
@@ -15993,11 +15993,12 @@
           }
         },
         {
-          "Name": "L2Weight",
+          "Name": "L2Regularization",
           "Type": "Float",
           "Desc": "L2 Regularization constant",
           "Aliases": [
-            "l2"
+            "l2",
+            "L2Weight"
           ],
           "Required": false,
           "SortOrder": 50.0,
@@ -16015,13 +16016,14 @@
           }
         },
         {
-          "Name": "NumThreads",
+          "Name": "NumberOfThreads",
           "Type": "Int",
           "Desc": "Degree of lock-free parallelism. Defaults to automatic depending on data sparseness. Determinism not guaranteed.",
           "Aliases": [
             "nt",
             "t",
-            "threads"
+            "threads",
+            "NumThreads"
           ],
           "Required": false,
           "SortOrder": 50.0,
@@ -16073,11 +16075,12 @@
           }
         },
         {
-          "Name": "MaxIterations",
+          "Name": "NumberOfIterations",
           "Type": "Int",
           "Desc": "Maximum number of iterations; set to 1 to simulate online learning.",
           "Aliases": [
-            "iter"
+            "iter",
+            "MaxIterations"
           ],
           "Required": false,
           "SortOrder": 150.0,
@@ -16094,12 +16097,13 @@
           }
         },
         {
-          "Name": "InitLearningRate",
+          "Name": "InitialLearningRate",
           "Type": "Float",
           "Desc": "Initial learning rate (only used by SGD)",
           "Aliases": [
             "ilr",
-            "lr"
+            "lr",
+            "InitLearningRate"
           ],
           "Required": false,
           "SortOrder": 150.0,
diff --git a/test/Microsoft.ML.StaticPipelineTesting/Training.cs b/test/Microsoft.ML.StaticPipelineTesting/Training.cs
index 85aa462334..39fa887e64 100644
--- a/test/Microsoft.ML.StaticPipelineTesting/Training.cs
+++ b/test/Microsoft.ML.StaticPipelineTesting/Training.cs
@@ -1010,7 +1010,7 @@ public void HogwildSGDLogisticRegression()
 
             var est = reader.MakeNewEstimator()
                 .Append(r => (r.label, preds: catalog.Trainers.StochasticGradientDescentClassificationTrainer(r.label, r.features, null,
-                    new SgdBinaryTrainer.Options { L2Weight = 0, NumThreads = 1 },
+                    new SgdBinaryTrainer.Options { L2Regularization = 0, NumberOfThreads = 1 },
                     onFit: (p) => { pred = p; })));
 
             var pipe = reader.Append(est);
@@ -1083,7 +1083,7 @@ public void HogwildSGDSupportVectorMachine()
 
             var est = reader.MakeNewEstimator()
                 .Append(r => (r.label, preds: catalog.Trainers.StochasticGradientDescentNonCalibratedClassificationTrainer(r.label, r.features, null,
-                    new SgdNonCalibratedBinaryTrainer.Options { L2Weight = 0, NumThreads = 1, Loss = new HingeLoss()},
+                    new SgdNonCalibratedBinaryTrainer.Options { L2Regularization = 0, NumberOfThreads = 1, Loss = new HingeLoss()},
                     onFit: (p) => { pred = p; })));
 
             var pipe = reader.Append(est);
diff --git a/test/Microsoft.ML.Tests/FeatureContributionTests.cs b/test/Microsoft.ML.Tests/FeatureContributionTests.cs
index 2db4dcb93c..ddf5641763 100644
--- a/test/Microsoft.ML.Tests/FeatureContributionTests.cs
+++ b/test/Microsoft.ML.Tests/FeatureContributionTests.cs
@@ -159,7 +159,7 @@ public void TestSDCABinary()
         public void TestSGDBinary()
         {
             TestFeatureContribution(ML.BinaryClassification.Trainers.StochasticGradientDescent(
-                new SgdBinaryTrainer.Options { NumThreads = 1}),
+                new SgdBinaryTrainer.Options { NumberOfThreads = 1}),
                 GetSparseDataset(TaskType.BinaryClassification, 100), "SGDBinary");
         }
 
diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/TrainerEstimators.cs b/test/Microsoft.ML.Tests/TrainerEstimators/TrainerEstimators.cs
index 28e7bc8a7f..c432957ec5 100644
--- a/test/Microsoft.ML.Tests/TrainerEstimators/TrainerEstimators.cs
+++ b/test/Microsoft.ML.Tests/TrainerEstimators/TrainerEstimators.cs
@@ -90,7 +90,7 @@ public void KMeansEstimator()
         public void TestEstimatorHogwildSGD()
         {
             var trainers = new[] { ML.BinaryClassification.Trainers.StochasticGradientDescent(l2Weight: 0, maxIterations: 80),
-                ML.BinaryClassification.Trainers.StochasticGradientDescent(new Trainers.SgdBinaryTrainer.Options(){ L2Weight = 0, MaxIterations = 80})};
+                ML.BinaryClassification.Trainers.StochasticGradientDescent(new Trainers.SgdBinaryTrainer.Options(){ L2Regularization = 0, NumberOfIterations = 80})};
 
             foreach (var trainer in trainers)
             {

From f40a6a4e1b75873e4fc3c54dbad77210d8c1c601 Mon Sep 17 00:00:00 2001
From: Abhishek Goswami <abgoswam@gmail.com>
Date: Sun, 3 Mar 2019 20:01:59 +0000
Subject: [PATCH 2/4] sdca binary

---
 .../SDCALogisticRegression.cs                 |  2 +-
 .../Standard/SdcaBinary.cs                    | 89 +++++++++++++------
 .../Standard/SdcaMultiClass.cs                |  6 +-
 .../Common/EntryPoints/core_manifest.json     | 45 ++++++----
 .../PredictionEngineBench.cs                  |  6 +-
 .../Evaluation.cs                             |  4 +-
 .../Training.cs                               |  8 +-
 .../FeatureContributionTests.cs               |  4 +-
 .../Estimators/DecomposableTrainAndPredict.cs |  2 +-
 .../Scenarios/Api/Estimators/Extensibility.cs |  2 +-
 .../Api/Estimators/IntrospectiveTraining.cs   |  2 +-
 .../Api/Estimators/Metacomponents.cs          |  2 +-
 .../Api/Estimators/MultithreadedPrediction.cs |  2 +-
 .../Api/Estimators/PredictAndMetadata.cs      |  2 +-
 .../Api/Estimators/SimpleTrainAndPredict.cs   |  2 +-
 .../Estimators/TrainSaveModelAndPredict.cs    |  2 +-
 .../Estimators/TrainWithInitialPredictor.cs   |  2 +-
 .../Scenarios/IrisPlantClassificationTests.cs |  2 +-
 ...PlantClassificationWithStringLabelTests.cs |  2 +-
 .../IrisPlantClassificationTests.cs           |  2 +-
 .../TrainerEstimators/MetalinearEstimators.cs |  8 +-
 21 files changed, 118 insertions(+), 78 deletions(-)

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SDCALogisticRegression.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SDCALogisticRegression.cs
index e5a01cd4f7..1955fe5a05 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SDCALogisticRegression.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SDCALogisticRegression.cs
@@ -65,7 +65,7 @@ public static void Example()
                                         LabelColumnName = "Sentiment",
                                         FeatureColumnName = "Features",
                                         ConvergenceTolerance = 0.01f,  // The learning rate for adjusting bias from being regularized
-                                        NumThreads = 2, // Degree of lock-free parallelism 
+                                        NumberOfThreads = 2, // Degree of lock-free parallelism 
                                       }));
 
             // Run Cross-Validation on this second pipeline.
diff --git a/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs b/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs
index 1d9d782076..f27e470dd7 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs
@@ -159,38 +159,63 @@ public abstract class SdcaTrainerBase<TOptions, TTransformer, TModel> : Stochast
 
         public abstract class OptionsBase : TrainerInputBaseWithLabel
         {
-            [Argument(ArgumentType.AtMostOnce, HelpText = "L2 regularizer constant. By default the l2 constant is automatically inferred based on data set.", NullName = "<Auto>", ShortName = "l2", SortOrder = 1)]
+            /// <summary>
+            /// L2 regularization weight.
+            /// </summary>
+            [Argument(ArgumentType.AtMostOnce, HelpText = "L2 regularizer constant. By default the l2 constant is automatically inferred based on data set.", NullName = "<Auto>", ShortName = "l2, L2Const", SortOrder = 1)]
             [TGUI(Label = "L2 Regularizer Constant", SuggestedSweeps = "<Auto>,1e-7,1e-6,1e-5,1e-4,1e-3,1e-2")]
             [TlcModule.SweepableDiscreteParam("L2Const", new object[] { "<Auto>", 1e-7f, 1e-6f, 1e-5f, 1e-4f, 1e-3f, 1e-2f })]
-            public float? L2Const;
+            public float? L2Regularization;
 
+            /// <summary>
+            /// L1 Soft Threshold.
+            /// </summary>
             // REVIEW: make the default positive when we know how to consume a sparse model
             [Argument(ArgumentType.AtMostOnce, HelpText = "L1 soft threshold (L1/L2). Note that it is easier to control and sweep using the threshold parameter than the raw L1-regularizer constant. By default the l1 threshold is automatically inferred based on data set.", NullName = "<Auto>", ShortName = "l1", SortOrder = 2)]
             [TGUI(Label = "L1 Soft Threshold", SuggestedSweeps = "<Auto>,0,0.25,0.5,0.75,1")]
             [TlcModule.SweepableDiscreteParam("L1Threshold", new object[] { "<Auto>", 0f, 0.25f, 0.5f, 0.75f, 1f })]
             public float? L1Threshold;
 
-            [Argument(ArgumentType.AtMostOnce, HelpText = "Degree of lock-free parallelism. Defaults to automatic. Determinism not guaranteed.", NullName = "<Auto>", ShortName = "nt,t,threads", SortOrder = 50)]
+            /// <summary>
+            /// Number of threads.
+            /// </summary>
+            [Argument(ArgumentType.AtMostOnce, HelpText = "Degree of lock-free parallelism. Defaults to automatic. Determinism not guaranteed.", NullName = "<Auto>", ShortName = "nt,t,threads, NumThreads", SortOrder = 50)]
             [TGUI(Label = "Number of threads", SuggestedSweeps = "<Auto>,1,2,4")]
-            public int? NumThreads;
+            public int? NumberOfThreads;
 
+            /// <summary>
+            /// The tolerance for the ratio between duality gap and primal loss for convergence checking.
+            /// </summary>
             [Argument(ArgumentType.AtMostOnce, HelpText = "The tolerance for the ratio between duality gap and primal loss for convergence checking.", ShortName = "tol")]
             [TGUI(SuggestedSweeps = "0.001, 0.01, 0.1, 0.2")]
             [TlcModule.SweepableDiscreteParam("ConvergenceTolerance", new object[] { 0.001f, 0.01f, 0.1f, 0.2f })]
             public float ConvergenceTolerance = 0.1f;
 
-            [Argument(ArgumentType.AtMostOnce, HelpText = "Maximum number of iterations; set to 1 to simulate online learning. Defaults to automatic.", NullName = "<Auto>", ShortName = "iter")]
+            /// <summary>
+            /// Number of iterations.
+            /// </summary>
+            [Argument(ArgumentType.AtMostOnce, HelpText = "Maximum number of iterations; set to 1 to simulate online learning. Defaults to automatic.", NullName = "<Auto>", ShortName = "iter, MaxIterations")]
             [TGUI(Label = "Max number of iterations", SuggestedSweeps = "<Auto>,10,20,100")]
             [TlcModule.SweepableDiscreteParam("MaxIterations", new object[] { "<Auto>", 10, 20, 100 })]
-            public int? MaxIterations;
+            public int? NumberOfIterations;
 
+            /// <summary>
+            /// Whether to shuffle data at every epoch (default true).
+            /// </summary>
             [Argument(ArgumentType.AtMostOnce, HelpText = "Shuffle data every epoch?", ShortName = "shuf")]
             [TlcModule.SweepableDiscreteParam("Shuffle", null, isBool: true)]
             public bool Shuffle = true;
 
+            /// <summary>
+            /// Convergence check frequency (in terms of <see cref="NumberOfIterations"/>). Set as negative or zero for not checking at all.
+            /// If left blank, it defaults to check after every <see cref="NumberOfThreads"/> iterations.
+            /// </summary>
             [Argument(ArgumentType.AtMostOnce, HelpText = "Convergence check frequency (in terms of number of iterations). Set as negative or zero for not checking at all. If left blank, it defaults to check after every 'numThreads' iterations.", NullName = "<Auto>", ShortName = "checkFreq")]
             public int? CheckFrequency;
 
+            /// <summary>
+            /// The learning rate for adjusting bias from being regularized.
+            /// </summary>
             [Argument(ArgumentType.AtMostOnce, HelpText = "The learning rate for adjusting bias from being regularized.", ShortName = "blr")]
             [TGUI(SuggestedSweeps = "0, 0.01, 0.1, 1")]
             [TlcModule.SweepableDiscreteParam("BiasLearningRate", new object[] { 0.0f, 0.01f, 0.1f, 1f })]
@@ -199,19 +224,19 @@ public abstract class OptionsBase : TrainerInputBaseWithLabel
             internal virtual void Check(IHostEnvironment env)
             {
                 Contracts.AssertValue(env);
-                env.CheckUserArg(L2Const == null || L2Const >= 0, nameof(L2Const), "L2 constant must be non-negative.");
+                env.CheckUserArg(L2Regularization == null || L2Regularization >= 0, nameof(L2Regularization), "L2 constant must be non-negative.");
                 env.CheckUserArg(L1Threshold == null || L1Threshold >= 0, nameof(L1Threshold), "L1 threshold must be non-negative.");
-                env.CheckUserArg(MaxIterations == null || MaxIterations > 0, nameof(MaxIterations), "Max number of iterations must be positive.");
+                env.CheckUserArg(NumberOfIterations == null || NumberOfIterations > 0, nameof(NumberOfIterations), "Max number of iterations must be positive.");
                 env.CheckUserArg(ConvergenceTolerance > 0 && ConvergenceTolerance <= 1, nameof(ConvergenceTolerance), "Convergence tolerance must be positive and no larger than 1.");
 
-                if (L2Const < L2LowerBound)
+                if (L2Regularization < L2LowerBound)
                 {
                     using (var ch = env.Start("SDCA arguments checking"))
                     {
                         ch.Warning($"The L2 regularization constant must be at least {L2LowerBound}. In SDCA, the dual formulation " +
                             $"is only valid with a positive constant, and values below {L2LowerBound} cause very slow convergence. " +
-                            $"The original {nameof(L2Const)} = {L2Const}, was replaced with {nameof(L2Const)} = {L2LowerBound}.");
-                        L2Const = L2LowerBound;
+                            $"The original {nameof(L2Regularization)} = {L2Regularization}, was replaced with {nameof(L2Regularization)} = {L2LowerBound}.");
+                        L2Regularization = L2LowerBound;
                     }
                 }
             }
@@ -267,9 +292,9 @@ internal SdcaTrainerBase(IHostEnvironment env, TOptions options, SchemaShape.Col
             : base(Contracts.CheckRef(env, nameof(env)).Register(RegisterName), TrainerUtils.MakeR4VecFeature(options.FeatureColumnName), label, weight)
         {
             SdcaTrainerOptions = options;
-            SdcaTrainerOptions.L2Const = l2Const ?? options.L2Const;
+            SdcaTrainerOptions.L2Regularization = l2Const ?? options.L2Regularization;
             SdcaTrainerOptions.L1Threshold = l1Threshold ?? options.L1Threshold;
-            SdcaTrainerOptions.MaxIterations = maxIterations ?? options.MaxIterations;
+            SdcaTrainerOptions.NumberOfIterations = maxIterations ?? options.NumberOfIterations;
             SdcaTrainerOptions.Check(env);
         }
 
@@ -292,10 +317,10 @@ private protected sealed override TModel TrainCore(IChannel ch, RoleMappedData d
 
             var cursorFactory = new FloatLabelCursor.Factory(data, cursorOpt);
             int numThreads;
-            if (SdcaTrainerOptions.NumThreads.HasValue)
+            if (SdcaTrainerOptions.NumberOfThreads.HasValue)
             {
-                numThreads = SdcaTrainerOptions.NumThreads.Value;
-                Host.CheckUserArg(numThreads > 0, nameof(OptionsBase.NumThreads), "The number of threads must be either null or a positive integer.");
+                numThreads = SdcaTrainerOptions.NumberOfThreads.Value;
+                Host.CheckUserArg(numThreads > 0, nameof(OptionsBase.NumberOfThreads), "The number of threads must be either null or a positive integer.");
                 if (0 < Host.ConcurrencyFactor && Host.ConcurrencyFactor < numThreads)
                 {
                     numThreads = Host.ConcurrencyFactor;
@@ -414,14 +439,14 @@ private protected sealed override TModel TrainCore(IChannel ch, RoleMappedData d
 
             ch.Check(count > 0, "Training set has 0 instances, aborting training.");
             // Tune the default hyperparameters based on dataset size.
-            if (SdcaTrainerOptions.MaxIterations == null)
-                SdcaTrainerOptions.MaxIterations = TuneDefaultMaxIterations(ch, count, numThreads);
+            if (SdcaTrainerOptions.NumberOfIterations == null)
+                SdcaTrainerOptions.NumberOfIterations = TuneDefaultMaxIterations(ch, count, numThreads);
 
-            Contracts.Assert(SdcaTrainerOptions.MaxIterations.HasValue);
-            if (SdcaTrainerOptions.L2Const == null)
-                SdcaTrainerOptions.L2Const = TuneDefaultL2(ch, SdcaTrainerOptions.MaxIterations.Value, count, numThreads);
+            Contracts.Assert(SdcaTrainerOptions.NumberOfIterations.HasValue);
+            if (SdcaTrainerOptions.L2Regularization == null)
+                SdcaTrainerOptions.L2Regularization = TuneDefaultL2(ch, SdcaTrainerOptions.NumberOfIterations.Value, count, numThreads);
 
-            Contracts.Assert(SdcaTrainerOptions.L2Const.HasValue);
+            Contracts.Assert(SdcaTrainerOptions.L2Regularization.HasValue);
             if (SdcaTrainerOptions.L1Threshold == null)
                 SdcaTrainerOptions.L1Threshold = TuneDefaultL1(ch, numFeatures);
 
@@ -455,8 +480,8 @@ private protected sealed override TModel TrainCore(IChannel ch, RoleMappedData d
 
             int bestIter = 0;
             var bestPrimalLoss = double.PositiveInfinity;
-            ch.Assert(SdcaTrainerOptions.L2Const.HasValue);
-            var l2Const = SdcaTrainerOptions.L2Const.Value;
+            ch.Assert(SdcaTrainerOptions.L2Regularization.HasValue);
+            var l2Const = SdcaTrainerOptions.L2Regularization.Value;
             float lambdaNInv = 1 / (l2Const * count);
 
             DualsTableBase duals = null;
@@ -519,8 +544,8 @@ private protected sealed override TModel TrainCore(IChannel ch, RoleMappedData d
             ch.AssertValue(metricNames);
             ch.AssertValue(metrics);
             ch.Assert(metricNames.Length == metrics.Length);
-            ch.Assert(SdcaTrainerOptions.MaxIterations.HasValue);
-            var maxIterations = SdcaTrainerOptions.MaxIterations.Value;
+            ch.Assert(SdcaTrainerOptions.NumberOfIterations.HasValue);
+            var maxIterations = SdcaTrainerOptions.NumberOfIterations.Value;
 
             var rands = new Random[maxIterations];
             for (int i = 0; i < maxIterations; i++)
@@ -762,7 +787,7 @@ private protected virtual void TrainWithoutLock(IProgressChannelProvider progres
             int maxUpdateTrials = 2 * numThreads;
             var l1Threshold = SdcaTrainerOptions.L1Threshold.Value;
             bool l1ThresholdZero = l1Threshold == 0;
-            var lr = SdcaTrainerOptions.BiasLearningRate * SdcaTrainerOptions.L2Const.Value;
+            var lr = SdcaTrainerOptions.BiasLearningRate * SdcaTrainerOptions.L2Regularization.Value;
             var pch = progress != null ? progress.StartProgressChannel("Dual update") : null;
             using (pch)
             using (var cursor = SdcaTrainerOptions.Shuffle ? cursorFactory.Create(rand) : cursorFactory.Create())
@@ -950,9 +975,9 @@ private protected virtual bool CheckConvergence(
                 Host.Assert(idToIdx == null || row == duals.Length);
             }
 
-            Contracts.Assert(SdcaTrainerOptions.L2Const.HasValue);
+            Contracts.Assert(SdcaTrainerOptions.L2Regularization.HasValue);
             Contracts.Assert(SdcaTrainerOptions.L1Threshold.HasValue);
-            Double l2Const = SdcaTrainerOptions.L2Const.Value;
+            Double l2Const = SdcaTrainerOptions.L2Regularization.Value;
             Double l1Threshold = SdcaTrainerOptions.L1Threshold.Value;
             Double l1Regularizer = l1Threshold * l2Const * (VectorUtils.L1Norm(in weights[0]) + Math.Abs(biasReg[0]));
             var l2Regularizer = l2Const * (VectorUtils.NormSquared(weights[0]) + biasReg[0] * biasReg[0]) * 0.5;
@@ -1421,6 +1446,9 @@ public abstract class SdcaBinaryTrainerBase<TModelParameters> :
 
         public class BinaryOptionsBase : OptionsBase
         {
+            /// <summary>
+            /// Weight applied to the examples in the positive class.
+            /// </summary>>
             [Argument(ArgumentType.AtMostOnce, HelpText = "Apply weight to the positive class, for imbalanced data", ShortName = "piw")]
             public float PositiveInstanceWeight = 1;
 
@@ -1584,6 +1612,9 @@ public sealed class SdcaNonCalibratedBinaryTrainer : SdcaBinaryTrainerBase<Linea
         /// </summary>
         public sealed class Options : BinaryOptionsBase
         {
+            /// <summary>
+            /// The loss function to use. Default is <see cref="LogLoss"/>.
+            /// </summary>
             [Argument(ArgumentType.Multiple, HelpText = "Loss Function", ShortName = "loss", SortOrder = 50)]
             public ISupportSdcaClassificationLossFactory LossFunction = new LogLossFactory();
         }
diff --git a/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs b/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs
index e1c9b195e5..06bd5dbfee 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs
@@ -131,7 +131,7 @@ private protected override void TrainWithoutLock(IProgressChannelProvider progre
             int maxUpdateTrials = 2 * numThreads;
             var l1Threshold = SdcaTrainerOptions.L1Threshold.Value;
             bool l1ThresholdZero = l1Threshold == 0;
-            var lr = SdcaTrainerOptions.BiasLearningRate * SdcaTrainerOptions.L2Const.Value;
+            var lr = SdcaTrainerOptions.BiasLearningRate * SdcaTrainerOptions.L2Regularization.Value;
 
             var pch = progress != null ? progress.StartProgressChannel("Dual update") : null;
             using (pch)
@@ -350,9 +350,9 @@ private protected override bool CheckConvergence(
                 Host.Assert(idToIdx == null || row * numClasses == duals.Length);
             }
 
-            Contracts.Assert(SdcaTrainerOptions.L2Const.HasValue);
+            Contracts.Assert(SdcaTrainerOptions.L2Regularization.HasValue);
             Contracts.Assert(SdcaTrainerOptions.L1Threshold.HasValue);
-            Double l2Const = SdcaTrainerOptions.L2Const.Value;
+            Double l2Const = SdcaTrainerOptions.L2Regularization.Value;
             Double l1Threshold = SdcaTrainerOptions.L1Threshold.Value;
 
             Double weightsL1Norm = 0;
diff --git a/test/BaselineOutput/Common/EntryPoints/core_manifest.json b/test/BaselineOutput/Common/EntryPoints/core_manifest.json
index 9ee352c935..9502385a5b 100644
--- a/test/BaselineOutput/Common/EntryPoints/core_manifest.json
+++ b/test/BaselineOutput/Common/EntryPoints/core_manifest.json
@@ -15048,11 +15048,12 @@
       "ShortName": "SDCA",
       "Inputs": [
         {
-          "Name": "L2Const",
+          "Name": "L2Regularization",
           "Type": "Float",
           "Desc": "L2 regularizer constant. By default the l2 constant is automatically inferred based on data set.",
           "Aliases": [
-            "l2"
+            "l2",
+            "L2Const"
           ],
           "Required": false,
           "SortOrder": 1.0,
@@ -15186,13 +15187,14 @@
           }
         },
         {
-          "Name": "NumThreads",
+          "Name": "NumberOfThreads",
           "Type": "Int",
           "Desc": "Degree of lock-free parallelism. Defaults to automatic. Determinism not guaranteed.",
           "Aliases": [
             "nt",
             "t",
-            "threads"
+            "threads",
+            "NumThreads"
           ],
           "Required": false,
           "SortOrder": 50.0,
@@ -15256,11 +15258,12 @@
           }
         },
         {
-          "Name": "MaxIterations",
+          "Name": "NumberOfIterations",
           "Type": "Int",
           "Desc": "Maximum number of iterations; set to 1 to simulate online learning. Defaults to automatic.",
           "Aliases": [
-            "iter"
+            "iter",
+            "MaxIterations"
           ],
           "Required": false,
           "SortOrder": 150.0,
@@ -15352,11 +15355,12 @@
       "ShortName": "sasdcamc",
       "Inputs": [
         {
-          "Name": "L2Const",
+          "Name": "L2Regularization",
           "Type": "Float",
           "Desc": "L2 regularizer constant. By default the l2 constant is automatically inferred based on data set.",
           "Aliases": [
-            "l2"
+            "l2",
+            "L2Const"
           ],
           "Required": false,
           "SortOrder": 1.0,
@@ -15490,13 +15494,14 @@
           }
         },
         {
-          "Name": "NumThreads",
+          "Name": "NumberOfThreads",
           "Type": "Int",
           "Desc": "Degree of lock-free parallelism. Defaults to automatic. Determinism not guaranteed.",
           "Aliases": [
             "nt",
             "t",
-            "threads"
+            "threads",
+            "NumThreads"
           ],
           "Required": false,
           "SortOrder": 50.0,
@@ -15525,11 +15530,12 @@
           }
         },
         {
-          "Name": "MaxIterations",
+          "Name": "NumberOfIterations",
           "Type": "Int",
           "Desc": "Maximum number of iterations; set to 1 to simulate online learning. Defaults to automatic.",
           "Aliases": [
-            "iter"
+            "iter",
+            "MaxIterations"
           ],
           "Required": false,
           "SortOrder": 150.0,
@@ -15621,11 +15627,12 @@
       "ShortName": "sasdcar",
       "Inputs": [
         {
-          "Name": "L2Const",
+          "Name": "L2Regularization",
           "Type": "Float",
           "Desc": "L2 regularizer constant. By default the l2 constant is automatically inferred based on data set.",
           "Aliases": [
-            "l2"
+            "l2",
+            "L2Const"
           ],
           "Required": false,
           "SortOrder": 1.0,
@@ -15759,13 +15766,14 @@
           }
         },
         {
-          "Name": "NumThreads",
+          "Name": "NumberOfThreads",
           "Type": "Int",
           "Desc": "Degree of lock-free parallelism. Defaults to automatic. Determinism not guaranteed.",
           "Aliases": [
             "nt",
             "t",
-            "threads"
+            "threads",
+            "NumThreads"
           ],
           "Required": false,
           "SortOrder": 50.0,
@@ -15794,11 +15802,12 @@
           }
         },
         {
-          "Name": "MaxIterations",
+          "Name": "NumberOfIterations",
           "Type": "Int",
           "Desc": "Maximum number of iterations; set to 1 to simulate online learning. Defaults to automatic.",
           "Aliases": [
-            "iter"
+            "iter",
+            "MaxIterations"
           ],
           "Required": false,
           "SortOrder": 150.0,
diff --git a/test/Microsoft.ML.Benchmarks/PredictionEngineBench.cs b/test/Microsoft.ML.Benchmarks/PredictionEngineBench.cs
index f77e9671a6..82e23b1447 100644
--- a/test/Microsoft.ML.Benchmarks/PredictionEngineBench.cs
+++ b/test/Microsoft.ML.Benchmarks/PredictionEngineBench.cs
@@ -59,7 +59,7 @@ public void SetupIrisPipeline()
 
             var pipeline = new ColumnConcatenatingEstimator(env, "Features", new[] { "SepalLength", "SepalWidth", "PetalLength", "PetalWidth" })
                 .Append(env.MulticlassClassification.Trainers.StochasticDualCoordinateAscent(
-                    new SdcaMultiClassTrainer.Options {NumThreads = 1, ConvergenceTolerance = 1e-2f, }));
+                    new SdcaMultiClassTrainer.Options {NumberOfThreads = 1, ConvergenceTolerance = 1e-2f, }));
 
             var model = pipeline.Fit(data);
 
@@ -94,7 +94,7 @@ public void SetupSentimentPipeline()
 
             var pipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText")
                 .Append(mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscentNonCalibrated(
-                    new SdcaNonCalibratedBinaryTrainer.Options {NumThreads = 1, ConvergenceTolerance = 1e-2f, }));
+                    new SdcaNonCalibratedBinaryTrainer.Options {NumberOfThreads = 1, ConvergenceTolerance = 1e-2f, }));
 
             var model = pipeline.Fit(data);
 
@@ -128,7 +128,7 @@ public void SetupBreastCancerPipeline()
             IDataView data = loader.Load(_breastCancerDataPath);
 
             var pipeline = env.BinaryClassification.Trainers.StochasticDualCoordinateAscentNonCalibrated(
-                new SdcaNonCalibratedBinaryTrainer.Options { NumThreads = 1, ConvergenceTolerance = 1e-2f, });
+                new SdcaNonCalibratedBinaryTrainer.Options { NumberOfThreads = 1, ConvergenceTolerance = 1e-2f, });
 
             var model = pipeline.Fit(data);
 
diff --git a/test/Microsoft.ML.Functional.Tests/Evaluation.cs b/test/Microsoft.ML.Functional.Tests/Evaluation.cs
index fd16635d6e..af641d2733 100644
--- a/test/Microsoft.ML.Functional.Tests/Evaluation.cs
+++ b/test/Microsoft.ML.Functional.Tests/Evaluation.cs
@@ -65,7 +65,7 @@ public void TrainAndEvaluateBinaryClassification()
             var pipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText")
                 .AppendCacheCheckpoint(mlContext)
                 .Append(mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscentNonCalibrated(
-                    new SdcaNonCalibratedBinaryTrainer.Options { NumThreads = 1 }));
+                    new SdcaNonCalibratedBinaryTrainer.Options { NumberOfThreads = 1 }));
 
             // Train the model.
             var model = pipeline.Fit(data);
@@ -151,7 +151,7 @@ public void TrainAndEvaluateMulticlassClassification()
             var pipeline = mlContext.Transforms.Concatenate("Features", Iris.Features)
                 .AppendCacheCheckpoint(mlContext)
                 .Append(mlContext.MulticlassClassification.Trainers.StochasticDualCoordinateAscent(
-                    new SdcaMultiClassTrainer.Options { NumThreads = 1}));
+                    new SdcaMultiClassTrainer.Options { NumberOfThreads = 1}));
 
             // Train the model.
             var model = pipeline.Fit(data);
diff --git a/test/Microsoft.ML.StaticPipelineTesting/Training.cs b/test/Microsoft.ML.StaticPipelineTesting/Training.cs
index 39fa887e64..6f8c524069 100644
--- a/test/Microsoft.ML.StaticPipelineTesting/Training.cs
+++ b/test/Microsoft.ML.StaticPipelineTesting/Training.cs
@@ -45,7 +45,7 @@ public void SdcaRegression()
 
             var est = reader.MakeNewEstimator()
                 .Append(r => (r.label, score: catalog.Trainers.Sdca(r.label, r.features, null,
-                new SdcaRegressionTrainer.Options() { MaxIterations = 2, NumThreads = 1 },
+                new SdcaRegressionTrainer.Options() { NumberOfIterations = 2, NumberOfThreads = 1 },
                 onFit: p => pred = p)));
 
             var pipe = reader.Append(est);
@@ -87,7 +87,7 @@ public void SdcaRegressionNameCollision()
 
             var est = reader.MakeNewEstimator()
                 .Append(r => (r.label, r.Score, score: catalog.Trainers.Sdca(r.label, r.features, null,
-                new SdcaRegressionTrainer.Options() { MaxIterations = 2, NumThreads = 1 })));
+                new SdcaRegressionTrainer.Options() { NumberOfIterations = 2, NumberOfThreads = 1 })));
 
             var pipe = reader.Append(est);
 
@@ -118,7 +118,7 @@ public void SdcaBinaryClassification()
 
             var est = reader.MakeNewEstimator()
                 .Append(r => (r.label, preds: catalog.Trainers.Sdca(r.label, r.features, null,
-                    new SdcaBinaryTrainer.Options { MaxIterations = 2, NumThreads = 1 },
+                    new SdcaBinaryTrainer.Options { NumberOfIterations = 2, NumberOfThreads = 1 },
                     onFit: (p) => { pred = p; })));
 
             var pipe = reader.Append(est);
@@ -198,7 +198,7 @@ public void SdcaBinaryClassificationNoCalibration()
             // With a custom loss function we no longer get calibrated predictions.
             var est = reader.MakeNewEstimator()
                 .Append(r => (r.label, preds: catalog.Trainers.SdcaNonCalibrated(r.label, r.features, null, loss,
-                new SdcaNonCalibratedBinaryTrainer.Options { MaxIterations = 2, NumThreads = 1 },
+                new SdcaNonCalibratedBinaryTrainer.Options { NumberOfIterations = 2, NumberOfThreads = 1 },
                 onFit: p => pred = p)));
 
             var pipe = reader.Append(est);
diff --git a/test/Microsoft.ML.Tests/FeatureContributionTests.cs b/test/Microsoft.ML.Tests/FeatureContributionTests.cs
index ddf5641763..19c9090461 100644
--- a/test/Microsoft.ML.Tests/FeatureContributionTests.cs
+++ b/test/Microsoft.ML.Tests/FeatureContributionTests.cs
@@ -76,7 +76,7 @@ public void TestFastTreeTweedieRegression()
         public void TestSDCARegression()
         {
             TestFeatureContribution(ML.Regression.Trainers.StochasticDualCoordinateAscent(
-                new SdcaRegressionTrainer.Options { NumThreads = 1, }), GetSparseDataset(numberOfInstances: 100), "SDCARegression");
+                new SdcaRegressionTrainer.Options { NumberOfThreads = 1, }), GetSparseDataset(numberOfInstances: 100), "SDCARegression");
         }
 
         [Fact]
@@ -152,7 +152,7 @@ public void TestLightGbmBinary()
         public void TestSDCABinary()
         {
             TestFeatureContribution(ML.BinaryClassification.Trainers.StochasticDualCoordinateAscentNonCalibrated(
-                new SdcaNonCalibratedBinaryTrainer.Options { NumThreads = 1, }), GetSparseDataset(TaskType.BinaryClassification, 100), "SDCABinary", precision: 5);
+                new SdcaNonCalibratedBinaryTrainer.Options { NumberOfThreads = 1, }), GetSparseDataset(TaskType.BinaryClassification, 100), "SDCABinary", precision: 5);
         }
 
         [Fact]
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/DecomposableTrainAndPredict.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/DecomposableTrainAndPredict.cs
index 28886b70d6..154a9f36b1 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/DecomposableTrainAndPredict.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/DecomposableTrainAndPredict.cs
@@ -32,7 +32,7 @@ void DecomposableTrainAndPredict()
             var pipeline = new ColumnConcatenatingEstimator (ml, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")
                 .Append(new ValueToKeyMappingEstimator(ml, "Label"), TransformerScope.TrainTest)
                 .Append(ml.MulticlassClassification.Trainers.StochasticDualCoordinateAscent(
-                    new SdcaMultiClassTrainer.Options { MaxIterations = 100, Shuffle = true, NumThreads = 1, }))
+                    new SdcaMultiClassTrainer.Options { NumberOfIterations = 100, Shuffle = true, NumberOfThreads = 1, }))
                 .Append(new KeyToValueMappingEstimator(ml, "PredictedLabel"));
 
             var model = pipeline.Fit(data).GetModelFor(TransformerScope.Scoring);
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Extensibility.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Extensibility.cs
index 47e1fc4034..f96665d08a 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Extensibility.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Extensibility.cs
@@ -41,7 +41,7 @@ void Extensibility()
                 .Append(new CustomMappingEstimator<IrisData, IrisData>(ml, action, null), TransformerScope.TrainTest)
                 .Append(new ValueToKeyMappingEstimator(ml, "Label"), TransformerScope.TrainTest)
                 .Append(ml.MulticlassClassification.Trainers.StochasticDualCoordinateAscent(
-                    new SdcaMultiClassTrainer.Options { MaxIterations = 100, Shuffle = true, NumThreads = 1 }))
+                    new SdcaMultiClassTrainer.Options { NumberOfIterations = 100, Shuffle = true, NumberOfThreads = 1 }))
                 .Append(new KeyToValueMappingEstimator(ml, "PredictedLabel"));
 
             var model = pipeline.Fit(data).GetModelFor(TransformerScope.Scoring);
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/IntrospectiveTraining.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/IntrospectiveTraining.cs
index e82545cd34..488ba2ae7a 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/IntrospectiveTraining.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/IntrospectiveTraining.cs
@@ -36,7 +36,7 @@ public void IntrospectiveTraining()
             var pipeline = ml.Transforms.Text.FeaturizeText("Features", "SentimentText")
                 .AppendCacheCheckpoint(ml)
                 .Append(ml.BinaryClassification.Trainers.StochasticDualCoordinateAscentNonCalibrated(
-                    new SdcaNonCalibratedBinaryTrainer.Options { NumThreads = 1 }));
+                    new SdcaNonCalibratedBinaryTrainer.Options { NumberOfThreads = 1 }));
 
             // Train.
             var model = pipeline.Fit(data);
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Metacomponents.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Metacomponents.cs
index a468c57704..63264e6b85 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Metacomponents.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Metacomponents.cs
@@ -24,7 +24,7 @@ public void Metacomponents()
             var data = ml.Data.LoadFromTextFile<IrisData>(GetDataPath(TestDatasets.irisData.trainFilename), separatorChar: ',');
 
             var sdcaTrainer = ml.BinaryClassification.Trainers.StochasticDualCoordinateAscentNonCalibrated(
-                new SdcaNonCalibratedBinaryTrainer.Options { MaxIterations = 100, Shuffle = true, NumThreads = 1, });
+                new SdcaNonCalibratedBinaryTrainer.Options { NumberOfIterations = 100, Shuffle = true, NumberOfThreads = 1, });
 
             var pipeline = new ColumnConcatenatingEstimator (ml, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")
                 .Append(ml.Transforms.Conversion.MapValueToKey("Label"), TransformerScope.TrainTest)
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/MultithreadedPrediction.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/MultithreadedPrediction.cs
index 305ec4dca3..d7913df28a 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/MultithreadedPrediction.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/MultithreadedPrediction.cs
@@ -31,7 +31,7 @@ void MultithreadedPrediction()
             var pipeline = ml.Transforms.Text.FeaturizeText("Features", "SentimentText")
                 .AppendCacheCheckpoint(ml)
                 .Append(ml.BinaryClassification.Trainers.StochasticDualCoordinateAscentNonCalibrated(
-                    new SdcaNonCalibratedBinaryTrainer.Options { NumThreads = 1 }));
+                    new SdcaNonCalibratedBinaryTrainer.Options { NumberOfThreads = 1 }));
 
             // Train.
             var model = pipeline.Fit(data);
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/PredictAndMetadata.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/PredictAndMetadata.cs
index f4a088a8fc..6937df3644 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/PredictAndMetadata.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/PredictAndMetadata.cs
@@ -30,7 +30,7 @@ void PredictAndMetadata()
             var pipeline = ml.Transforms.Concatenate("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")
                 .Append(ml.Transforms.Conversion.MapValueToKey("Label"), TransformerScope.TrainTest)
                 .Append(ml.MulticlassClassification.Trainers.StochasticDualCoordinateAscent(
-                    new SdcaMultiClassTrainer.Options { MaxIterations = 100, Shuffle = true, NumThreads = 1, }));
+                    new SdcaMultiClassTrainer.Options { NumberOfIterations = 100, Shuffle = true, NumberOfThreads = 1, }));
 
             var model = pipeline.Fit(data).GetModelFor(TransformerScope.Scoring);
             var engine = model.CreatePredictionEngine<IrisDataNoLabel, IrisPredictionNotCasted>(ml);
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/SimpleTrainAndPredict.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/SimpleTrainAndPredict.cs
index a6366365a0..67d2eabf0f 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/SimpleTrainAndPredict.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/SimpleTrainAndPredict.cs
@@ -29,7 +29,7 @@ public void SimpleTrainAndPredict()
             var pipeline = ml.Transforms.Text.FeaturizeText("Features", "SentimentText")
                 .AppendCacheCheckpoint(ml)
                 .Append(ml.BinaryClassification.Trainers.StochasticDualCoordinateAscentNonCalibrated(
-                    new SdcaNonCalibratedBinaryTrainer.Options { NumThreads = 1 }));
+                    new SdcaNonCalibratedBinaryTrainer.Options { NumberOfThreads = 1 }));
 
             // Train.
             var model = pipeline.Fit(data);
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainSaveModelAndPredict.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainSaveModelAndPredict.cs
index e8607dd284..fd2146d1ad 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainSaveModelAndPredict.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainSaveModelAndPredict.cs
@@ -29,7 +29,7 @@ public void TrainSaveModelAndPredict()
             var pipeline = ml.Transforms.Text.FeaturizeText("Features", "SentimentText")
                 .AppendCacheCheckpoint(ml)
                 .Append(ml.BinaryClassification.Trainers.StochasticDualCoordinateAscentNonCalibrated(
-                    new SdcaNonCalibratedBinaryTrainer.Options { NumThreads = 1 }));
+                    new SdcaNonCalibratedBinaryTrainer.Options { NumberOfThreads = 1 }));
 
             // Train.
             var model = pipeline.Fit(data);
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainWithInitialPredictor.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainWithInitialPredictor.cs
index 8bec24c96a..85578928ed 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainWithInitialPredictor.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainWithInitialPredictor.cs
@@ -32,7 +32,7 @@ public void TrainWithInitialPredictor()
 
             // Train the first predictor.
             var trainer = ml.BinaryClassification.Trainers.StochasticDualCoordinateAscentNonCalibrated(
-                new SdcaNonCalibratedBinaryTrainer.Options { NumThreads = 1 });
+                new SdcaNonCalibratedBinaryTrainer.Options { NumberOfThreads = 1 });
 
             var firstModel = trainer.Fit(trainData);
 
diff --git a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs
index 10d9fea1ef..cb6aef7c33 100644
--- a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs
@@ -32,7 +32,7 @@ public void TrainAndPredictIrisModelTest()
                             .Append(mlContext.Transforms.Normalize("Features"))
                             .AppendCacheCheckpoint(mlContext)
                             .Append(mlContext.MulticlassClassification.Trainers.StochasticDualCoordinateAscent(
-                                new SdcaMultiClassTrainer.Options { NumThreads = 1 }));
+                                new SdcaMultiClassTrainer.Options { NumberOfThreads = 1 }));
 
             // Read training and test data sets
             string dataPath = GetDataPath(TestDatasets.iris.trainFilename);
diff --git a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs
index 8f7289dd09..75c4569729 100644
--- a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs
@@ -38,7 +38,7 @@ public void TrainAndPredictIrisModelWithStringLabelTest()
                 .Append(mlContext.Transforms.Conversion.MapValueToKey("Label", "IrisPlantType"), TransformerScope.TrainTest)
                 .AppendCacheCheckpoint(mlContext)
                 .Append(mlContext.MulticlassClassification.Trainers.StochasticDualCoordinateAscent(
-                    new SdcaMultiClassTrainer.Options { NumThreads = 1 }))
+                    new SdcaMultiClassTrainer.Options { NumberOfThreads = 1 }))
                 .Append(mlContext.Transforms.Conversion.MapKeyToValue(("Plant", "PredictedLabel")));
 
             // Train the pipeline
diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs
index e3a2216789..988334a67f 100644
--- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs
+++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs
@@ -30,7 +30,7 @@ public void TrainAndPredictIrisModelUsingDirectInstantiationTest()
                 .Append(mlContext.Transforms.Normalize("Features"))
                 .AppendCacheCheckpoint(mlContext)
                 .Append(mlContext.MulticlassClassification.Trainers.StochasticDualCoordinateAscent(
-                    new SdcaMultiClassTrainer.Options { NumThreads = 1 }));
+                    new SdcaMultiClassTrainer.Options { NumberOfThreads = 1 }));
 
             // Read training and test data sets
             string dataPath = GetDataPath(TestDatasets.iris.trainFilename);
diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/MetalinearEstimators.cs b/test/Microsoft.ML.Tests/TrainerEstimators/MetalinearEstimators.cs
index ce05c26aca..614ae4397c 100644
--- a/test/Microsoft.ML.Tests/TrainerEstimators/MetalinearEstimators.cs
+++ b/test/Microsoft.ML.Tests/TrainerEstimators/MetalinearEstimators.cs
@@ -42,7 +42,7 @@ public void OVAUncalibrated()
         {
             var (pipeline, data) = GetMultiClassPipeline();
             var sdcaTrainer = ML.BinaryClassification.Trainers.StochasticDualCoordinateAscentNonCalibrated(
-                new SdcaNonCalibratedBinaryTrainer.Options { MaxIterations = 100, Shuffle = true, NumThreads = 1 });
+                new SdcaNonCalibratedBinaryTrainer.Options { NumberOfIterations = 100, Shuffle = true, NumberOfThreads = 1 });
 
             pipeline = pipeline.Append(ML.MulticlassClassification.Trainers.OneVersusAll(sdcaTrainer, useProbabilities: false))
                     .Append(new KeyToValueMappingEstimator(Env, "PredictedLabel"));
@@ -60,7 +60,7 @@ public void PairwiseCouplingTrainer()
             var (pipeline, data) = GetMultiClassPipeline();
 
             var sdcaTrainer = ML.BinaryClassification.Trainers.StochasticDualCoordinateAscentNonCalibrated(
-                new SdcaNonCalibratedBinaryTrainer.Options { MaxIterations = 100, Shuffle = true, NumThreads = 1 });
+                new SdcaNonCalibratedBinaryTrainer.Options { NumberOfIterations = 100, Shuffle = true, NumberOfThreads = 1 });
 
             pipeline = pipeline.Append(ML.MulticlassClassification.Trainers.PairwiseCoupling(sdcaTrainer))
                     .Append(ML.Transforms.Conversion.MapKeyToValue("PredictedLabel"));
@@ -86,9 +86,9 @@ public void MetacomponentsFeaturesRenamed()
                 new SdcaNonCalibratedBinaryTrainer.Options {
                     LabelColumnName = "Label",
                     FeatureColumnName = "Vars",
-                    MaxIterations = 100,
+                    NumberOfIterations = 100,
                     Shuffle = true,
-                    NumThreads = 1, });
+                    NumberOfThreads = 1, });
 
             var pipeline = new ColumnConcatenatingEstimator(Env, "Vars", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")
                 .Append(new ValueToKeyMappingEstimator(Env, "Label"), TransformerScope.TrainTest)

From 72656f2bc28e5293251c0455aa38ebb99a0f5217 Mon Sep 17 00:00:00 2001
From: Abhishek Goswami <abgoswam@gmail.com>
Date: Sun, 3 Mar 2019 20:26:44 +0000
Subject: [PATCH 3/4] SDCA regression and multiclass

---
 .../Standard/LinearModelParameters.cs                        | 3 +++
 .../LogisticRegression/MulticlassLogisticRegression.cs       | 3 +++
 src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs | 5 ++++-
 src/Microsoft.ML.StandardLearners/Standard/SdcaRegression.cs | 3 +++
 4 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/Microsoft.ML.StandardLearners/Standard/LinearModelParameters.cs b/src/Microsoft.ML.StandardLearners/Standard/LinearModelParameters.cs
index 2fa9718519..946f5e5150 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/LinearModelParameters.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/LinearModelParameters.cs
@@ -580,6 +580,9 @@ private protected override void SaveAsIni(TextWriter writer, RoleMappedSchema sc
         }
     }
 
+    /// <summary>
+    /// The model parameters class for linear regression.
+    /// </summary>
     public sealed class LinearRegressionModelParameters : RegressionModelParameters,
         IParameterMixer<float>,
         ICanGetSummaryInKeyValuePairs
diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs
index 4d4469c10c..abc0722b71 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs
@@ -333,6 +333,9 @@ public MulticlassPredictionTransformer<MulticlassLogisticRegressionModelParamete
             => TrainTransformer(trainData, initPredictor: modelParameters);
     }
 
+    /// <summary>
+    /// The model parameter class for multiclass logistic regression.
+    /// </summary>
     public sealed class MulticlassLogisticRegressionModelParameters :
         ModelParametersBase<VBuffer<float>>,
         IValueMapper,
diff --git a/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs b/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs
index 06bd5dbfee..64f3db6c89 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs
@@ -26,7 +26,7 @@ namespace Microsoft.ML.Trainers
 {
     // SDCA linear multiclass trainer.
     /// <include file='doc.xml' path='doc/members/member[@name="SDCA"]/*' />
-    public class SdcaMultiClassTrainer : SdcaTrainerBase<SdcaMultiClassTrainer.Options, MulticlassPredictionTransformer<MulticlassLogisticRegressionModelParameters>, MulticlassLogisticRegressionModelParameters>
+    public sealed class SdcaMultiClassTrainer : SdcaTrainerBase<SdcaMultiClassTrainer.Options, MulticlassPredictionTransformer<MulticlassLogisticRegressionModelParameters>, MulticlassLogisticRegressionModelParameters>
     {
         internal const string LoadNameValue = "SDCAMC";
         internal const string UserNameValue = "Fast Linear Multi-class Classification (SA-SDCA)";
@@ -35,6 +35,9 @@ public class SdcaMultiClassTrainer : SdcaTrainerBase<SdcaMultiClassTrainer.Optio
 
         public sealed class Options : OptionsBase
         {
+            /// <summary>
+            /// The loss function to use. Default is <see cref="LogLoss"/>.
+            /// </summary>
             [Argument(ArgumentType.Multiple, HelpText = "Loss Function", ShortName = "loss", SortOrder = 50)]
             public ISupportSdcaClassificationLossFactory LossFunction = new LogLossFactory();
         }
diff --git a/src/Microsoft.ML.StandardLearners/Standard/SdcaRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/SdcaRegression.cs
index 3cb1a42dae..c51d97e610 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/SdcaRegression.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/SdcaRegression.cs
@@ -31,6 +31,9 @@ public sealed class SdcaRegressionTrainer : SdcaTrainerBase<SdcaRegressionTraine
 
         public sealed class Options : OptionsBase
         {
+            /// <summary>
+            /// The loss function to use. Default is <see cref="SquaredLoss"/>.
+            /// </summary>
             [Argument(ArgumentType.Multiple, HelpText = "Loss Function", ShortName = "loss", SortOrder = 50)]
             public ISupportSdcaRegressionLossFactory LossFunction = new SquaredLossFactory();
 

From 888a65426825acf0db401d7d3ca72d746b6dcba8 Mon Sep 17 00:00:00 2001
From: Abhishek Goswami <abgoswam@gmail.com>
Date: Wed, 6 Mar 2019 03:12:18 +0000
Subject: [PATCH 4/4] review comments

---
 .../StochasticDualCoordinateAscent.cs         |  2 +-
 ...hasticDualCoordinateAscentNonCalibrated.cs |  2 +-
 .../Static/SDCABinaryClassification.cs        |  2 +-
 .../Static/SDCARegression.cs                  |  2 +-
 .../Standard/SdcaBinary.cs                    |  8 +--
 .../StandardLearnersCatalog.cs                | 68 +++++++++----------
 .../SdcaStaticExtensions.cs                   | 56 +++++++--------
 src/Microsoft.ML.StaticPipe/SgdStatic.cs      | 28 ++++----
 .../Common/EntryPoints/core_manifest.json     | 15 ++--
 .../UnitTests/TestEntryPoints.cs              |  8 +--
 .../DataTransformation.cs                     |  2 +-
 .../Training.cs                               |  4 +-
 .../TrainerEstimators/SdcaTests.cs            |  4 +-
 .../TrainerEstimators/TrainerEstimators.cs    |  2 +-
 14 files changed, 103 insertions(+), 100 deletions(-)

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticDualCoordinateAscent.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticDualCoordinateAscent.cs
index 2c2106ccc4..74d8f8445c 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticDualCoordinateAscent.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticDualCoordinateAscent.cs
@@ -49,7 +49,7 @@ public static void Example()
             // the "Features" column produced by FeaturizeText as the features column.
             var pipeline = mlContext.Transforms.Text.FeaturizeText("SentimentText", "Features")
                     .AppendCacheCheckpoint(mlContext) // Add a data-cache step within a pipeline.
-                    .Append(mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscent(labelColumnName: "Sentiment", featureColumnName: "Features", l2Const: 0.001f));
+                    .Append(mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscent(labelColumnName: "Sentiment", featureColumnName: "Features", l2Regularization: 0.001f));
 
             // Step 3: Run Cross-Validation on this pipeline.
             var cvResults = mlContext.BinaryClassification.CrossValidate(data, pipeline, labelColumn: "Sentiment");
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticDualCoordinateAscentNonCalibrated.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticDualCoordinateAscentNonCalibrated.cs
index 9a7892fdb9..814c66a54a 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticDualCoordinateAscentNonCalibrated.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticDualCoordinateAscentNonCalibrated.cs
@@ -41,7 +41,7 @@ public static void Example()
             // Step 2: Create a binary classifier. This trainer may produce a logistic regression model.
             // We set the "Label" column as the label of the dataset, and the "Features" column as the features column.
             var pipeline = mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscentNonCalibrated(
-                labelColumnName: "Label", featureColumnName: "Features", loss: new HingeLoss(), l2Const: 0.001f);
+                labelColumnName: "Label", featureColumnName: "Features", loss: new HingeLoss(), l2Regularization: 0.001f);
 
             // Step 3: Train the pipeline created.
             var model = pipeline.Fit(data);
diff --git a/docs/samples/Microsoft.ML.Samples/Static/SDCABinaryClassification.cs b/docs/samples/Microsoft.ML.Samples/Static/SDCABinaryClassification.cs
index 61f881ec4d..8150b6400b 100644
--- a/docs/samples/Microsoft.ML.Samples/Static/SDCABinaryClassification.cs
+++ b/docs/samples/Microsoft.ML.Samples/Static/SDCABinaryClassification.cs
@@ -77,7 +77,7 @@ public static void SdcaBinaryClassification()
                             row.Label,
                             row.Features,
                             l1Threshold: 0.25f,
-                            maxIterations: 100)))
+                            numberOfIterations: 100)))
                 .Append(row => (
                     Label: row.Label,
                     Score: row.Score,
diff --git a/docs/samples/Microsoft.ML.Samples/Static/SDCARegression.cs b/docs/samples/Microsoft.ML.Samples/Static/SDCARegression.cs
index 09da261fb6..602a3a5473 100644
--- a/docs/samples/Microsoft.ML.Samples/Static/SDCARegression.cs
+++ b/docs/samples/Microsoft.ML.Samples/Static/SDCARegression.cs
@@ -37,7 +37,7 @@ public static void SdcaRegression()
                                             r.label,
                                             r.features,
                                             l1Threshold: 0f,
-                                            maxIterations: 100,
+                                            numberOfIterations: 100,
                                         onFit: p => pred = p)
                                 )
                         );
diff --git a/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs b/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs
index 6aab39016e..80a53d6a42 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs
@@ -225,8 +225,8 @@ public abstract class OptionsBase : TrainerInputBaseWithLabel
             /// <value>
             /// Set to zero or negative value to disable checking. If <see langword="null"/>, it defaults to <see cref="NumberOfThreads"/>."
             /// </value>
-            [Argument(ArgumentType.AtMostOnce, HelpText = "Convergence check frequency (in terms of number of iterations). Set as negative or zero for not checking at all. If left blank, it defaults to check after every 'numThreads' iterations.", NullName = "<Auto>", ShortName = "checkFreq")]
-            public int? CheckFrequency;
+            [Argument(ArgumentType.AtMostOnce, HelpText = "Convergence check frequency (in terms of number of iterations). Set as negative or zero for not checking at all. If left blank, it defaults to check after every 'numThreads' iterations.", NullName = "<Auto>", ShortName = "checkFreq, CheckFrequency")]
+            public int? ConvergenceCheckFrequency;
 
             /// <summary>
             /// The learning rate for adjusting bias from being regularized.
@@ -353,8 +353,8 @@ private protected sealed override TModel TrainCore(IChannel ch, RoleMappedData d
                 ch.Info("Using {0} threads to train.", numThreads);
 
             int checkFrequency = 0;
-            if (SdcaTrainerOptions.CheckFrequency.HasValue)
-                checkFrequency = SdcaTrainerOptions.CheckFrequency.Value;
+            if (SdcaTrainerOptions.ConvergenceCheckFrequency.HasValue)
+                checkFrequency = SdcaTrainerOptions.ConvergenceCheckFrequency.Value;
             else
             {
                 checkFrequency = numThreads;
diff --git a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
index 1f05261e8e..f32c45ff85 100644
--- a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
+++ b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
@@ -22,9 +22,9 @@ public static class StandardLearnersCatalog
         /// <param name="labelColumnName">The name of the label column, or dependent variable.</param>
         /// <param name="featureColumnName">The features, or independent variables.</param>
         /// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
-        /// <param name="maxIterations">The maximum number of passes through the training dataset; set to 1 to simulate online learning.</param>
-        /// <param name="initLearningRate">The initial <a href="tmpurl_lr">learning rate</a> used by SGD.</param>
-        /// <param name="l2Weight">The L2 weight for <a href='tmpurl_regularization'>regularization</a>.</param>
+        /// <param name="numberOfIterations">The maximum number of passes through the training dataset; set to 1 to simulate online learning.</param>
+        /// <param name="initialLearningRate">The initial <a href="tmpurl_lr">learning rate</a> used by SGD.</param>
+        /// <param name="l2Regularization">The L2 weight for <a href='tmpurl_regularization'>regularization</a>.</param>
         /// <example>
         /// <format type="text/markdown">
         /// <![CDATA[
@@ -36,14 +36,14 @@ public static SgdBinaryTrainer StochasticGradientDescent(this BinaryClassificati
             string labelColumnName = DefaultColumnNames.Label,
             string featureColumnName = DefaultColumnNames.Features,
             string exampleWeightColumnName = null,
-            int maxIterations = SgdBinaryTrainer.Options.Defaults.NumberOfIterations,
-            double initLearningRate = SgdBinaryTrainer.Options.Defaults.InitialLearningRate,
-            float l2Weight = SgdBinaryTrainer.Options.Defaults.L2Regularization)
+            int numberOfIterations = SgdBinaryTrainer.Options.Defaults.NumberOfIterations,
+            double initialLearningRate = SgdBinaryTrainer.Options.Defaults.InitialLearningRate,
+            float l2Regularization = SgdBinaryTrainer.Options.Defaults.L2Regularization)
         {
             Contracts.CheckValue(catalog, nameof(catalog));
             var env = CatalogUtils.GetEnvironment(catalog);
             return new SgdBinaryTrainer(env, labelColumnName, featureColumnName, exampleWeightColumnName,
-                maxIterations, initLearningRate, l2Weight);
+                numberOfIterations, initialLearningRate, l2Regularization);
         }
 
         /// <summary>
@@ -76,9 +76,9 @@ public static SgdBinaryTrainer StochasticGradientDescent(this BinaryClassificati
         /// <param name="featureColumnName">The features, or independent variables.</param>
         /// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
         /// <param name="loss">The loss function minimized in the training process. Using, for example, <see cref="HingeLoss"/> leads to a support vector machine trainer.</param>
-        /// <param name="maxIterations">The maximum number of passes through the training dataset; set to 1 to simulate online learning.</param>
-        /// <param name="initLearningRate">The initial <a href="tmpurl_lr">learning rate</a> used by SGD.</param>
-        /// <param name="l2Weight">The L2 weight for <a href='tmpurl_regularization'>regularization</a>.</param>
+        /// <param name="numberOfIterations">The maximum number of passes through the training dataset; set to 1 to simulate online learning.</param>
+        /// <param name="initialLearningRate">The initial <a href="tmpurl_lr">learning rate</a> used by SGD.</param>
+        /// <param name="l2Regularization">The L2 weight for <a href='tmpurl_regularization'>regularization</a>.</param>
         /// <example>
         /// <format type="text/markdown">
         /// <![CDATA[
@@ -91,14 +91,14 @@ public static SgdNonCalibratedBinaryTrainer StochasticGradientDescentNonCalibrat
             string featureColumnName = DefaultColumnNames.Features,
             string exampleWeightColumnName = null,
             IClassificationLoss loss = null,
-            int maxIterations = SgdNonCalibratedBinaryTrainer.Options.Defaults.NumberOfIterations,
-            double initLearningRate = SgdNonCalibratedBinaryTrainer.Options.Defaults.InitialLearningRate,
-            float l2Weight = SgdNonCalibratedBinaryTrainer.Options.Defaults.L2Regularization)
+            int numberOfIterations = SgdNonCalibratedBinaryTrainer.Options.Defaults.NumberOfIterations,
+            double initialLearningRate = SgdNonCalibratedBinaryTrainer.Options.Defaults.InitialLearningRate,
+            float l2Regularization = SgdNonCalibratedBinaryTrainer.Options.Defaults.L2Regularization)
         {
             Contracts.CheckValue(catalog, nameof(catalog));
             var env = CatalogUtils.GetEnvironment(catalog);
             return new SgdNonCalibratedBinaryTrainer(env, labelColumnName, featureColumnName, exampleWeightColumnName,
-                maxIterations, initLearningRate, l2Weight, loss);
+                numberOfIterations, initialLearningRate, l2Regularization, loss);
         }
 
         /// <summary>
@@ -130,9 +130,9 @@ public static SgdNonCalibratedBinaryTrainer StochasticGradientDescentNonCalibrat
         /// <param name="labelColumnName">The name of the label column.</param>
         /// <param name="featureColumnName">The name of the feature column.</param>
         /// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
-        /// <param name="l2Const">The L2 <a href='tmpurl_regularization'>regularization</a> hyperparameter.</param>
+        /// <param name="l2Regularization">The L2 <a href='tmpurl_regularization'>regularization</a> hyperparameter.</param>
         /// <param name="l1Threshold">The L1 <a href='tmpurl_regularization'>regularization</a> hyperparameter. Higher values will tend to lead to more sparse model.</param>
-        /// <param name="maxIterations">The maximum number of passes to perform over the data.</param>
+        /// <param name="numberOfIterations">The maximum number of passes to perform over the data.</param>
         /// <param name="loss">The custom <a href="tmpurl_loss">loss</a>, if unspecified will be <see cref="SquaredLoss"/>.</param>
         /// <example>
         /// <format type="text/markdown">
@@ -145,13 +145,13 @@ public static SdcaRegressionTrainer StochasticDualCoordinateAscent(this Regressi
             string featureColumnName = DefaultColumnNames.Features,
             string exampleWeightColumnName = null,
             ISupportSdcaRegressionLoss loss = null,
-            float? l2Const = null,
+            float? l2Regularization = null,
             float? l1Threshold = null,
-            int? maxIterations = null)
+            int? numberOfIterations = null)
         {
             Contracts.CheckValue(catalog, nameof(catalog));
             var env = CatalogUtils.GetEnvironment(catalog);
-            return new SdcaRegressionTrainer(env, labelColumnName, featureColumnName, exampleWeightColumnName, loss, l2Const, l1Threshold, maxIterations);
+            return new SdcaRegressionTrainer(env, labelColumnName, featureColumnName, exampleWeightColumnName, loss, l2Regularization, l1Threshold, numberOfIterations);
         }
 
         /// <summary>
@@ -182,9 +182,9 @@ public static SdcaRegressionTrainer StochasticDualCoordinateAscent(this Regressi
         /// <param name="labelColumnName">The name of the label column.</param>
         /// <param name="featureColumnName">The name of the feature column.</param>
         /// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
-        /// <param name="l2Const">The L2 <a href='tmpurl_regularization'>regularization</a> hyperparameter.</param>
+        /// <param name="l2Regularization">The L2 <a href='tmpurl_regularization'>regularization</a> hyperparameter.</param>
         /// <param name="l1Threshold">The L1 <a href='tmpurl_regularization'>regularization</a> hyperparameter. Higher values will tend to lead to more sparse model.</param>
-        /// <param name="maxIterations">The maximum number of passes to perform over the data.</param>
+        /// <param name="numberOfIterations">The maximum number of passes to perform over the data.</param>
         /// <example>
         /// <format type="text/markdown">
         /// <![CDATA[
@@ -196,13 +196,13 @@ public static SdcaBinaryTrainer StochasticDualCoordinateAscent(
                 string labelColumnName = DefaultColumnNames.Label,
                 string featureColumnName = DefaultColumnNames.Features,
                 string exampleWeightColumnName = null,
-                float? l2Const = null,
+                float? l2Regularization = null,
                 float? l1Threshold = null,
-                int? maxIterations = null)
+                int? numberOfIterations = null)
         {
             Contracts.CheckValue(catalog, nameof(catalog));
             var env = CatalogUtils.GetEnvironment(catalog);
-            return new SdcaBinaryTrainer(env, labelColumnName, featureColumnName, exampleWeightColumnName, l2Const, l1Threshold, maxIterations);
+            return new SdcaBinaryTrainer(env, labelColumnName, featureColumnName, exampleWeightColumnName, l2Regularization, l1Threshold, numberOfIterations);
         }
 
         /// <summary>
@@ -235,9 +235,9 @@ public static SdcaBinaryTrainer StochasticDualCoordinateAscent(
         /// <param name="featureColumnName">The name of the feature column.</param>
         /// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
         /// <param name="loss">The custom <a href="tmpurl_loss">loss</a>. Defaults to <see cref="LogLoss"/> if not specified.</param>
-        /// <param name="l2Const">The L2 <a href='tmpurl_regularization'>regularization</a> hyperparameter.</param>
+        /// <param name="l2Regularization">The L2 <a href='tmpurl_regularization'>regularization</a> hyperparameter.</param>
         /// <param name="l1Threshold">The L1 <a href='tmpurl_regularization'>regularization</a> hyperparameter. Higher values will tend to lead to more sparse model.</param>
-        /// <param name="maxIterations">The maximum number of passes to perform over the data.</param>
+        /// <param name="numberOfIterations">The maximum number of passes to perform over the data.</param>
         /// <example>
         /// <format type="text/markdown">
         /// <![CDATA[
@@ -250,13 +250,13 @@ public static SdcaNonCalibratedBinaryTrainer StochasticDualCoordinateAscentNonCa
                 string featureColumnName = DefaultColumnNames.Features,
                 string exampleWeightColumnName = null,
                 ISupportSdcaClassificationLoss loss = null,
-                float? l2Const = null,
+                float? l2Regularization = null,
                 float? l1Threshold = null,
-                int? maxIterations = null)
+                int? numberOfIterations = null)
         {
             Contracts.CheckValue(catalog, nameof(catalog));
             var env = CatalogUtils.GetEnvironment(catalog);
-            return new SdcaNonCalibratedBinaryTrainer(env, labelColumnName, featureColumnName, exampleWeightColumnName, loss, l2Const, l1Threshold, maxIterations);
+            return new SdcaNonCalibratedBinaryTrainer(env, labelColumnName, featureColumnName, exampleWeightColumnName, loss, l2Regularization, l1Threshold, numberOfIterations);
         }
 
         /// <summary>
@@ -283,9 +283,9 @@ public static SdcaNonCalibratedBinaryTrainer StochasticDualCoordinateAscentNonCa
         /// <param name="featureColumnName">The name of the feature column.</param>
         /// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
         /// <param name="loss">The custom <a href="tmpurl_loss">loss</a>. Defaults to <see cref="LogLoss"/> if not specified.</param>
-        /// <param name="l2Const">The L2 <a href='tmpurl_regularization'>regularization</a> hyperparameter.</param>
+        /// <param name="l2Regularization">The L2 <a href='tmpurl_regularization'>regularization</a> hyperparameter.</param>
         /// <param name="l1Threshold">The L1 <a href='tmpurl_regularization'>regularization</a> hyperparameter. Higher values will tend to lead to more sparse model.</param>
-        /// <param name="maxIterations">The maximum number of passes to perform over the data.</param>
+        /// <param name="numberOfIterations">The maximum number of passes to perform over the data.</param>
         /// <example>
         /// <format type="text/markdown">
         /// <![CDATA[
@@ -297,13 +297,13 @@ public static SdcaMultiClassTrainer StochasticDualCoordinateAscent(this Multicla
                     string featureColumnName = DefaultColumnNames.Features,
                     string exampleWeightColumnName = null,
                     ISupportSdcaClassificationLoss loss = null,
-                    float? l2Const = null,
+                    float? l2Regularization = null,
                     float? l1Threshold = null,
-                    int? maxIterations = null)
+                    int? numberOfIterations = null)
         {
             Contracts.CheckValue(catalog, nameof(catalog));
             var env = CatalogUtils.GetEnvironment(catalog);
-            return new SdcaMultiClassTrainer(env, labelColumnName, featureColumnName, exampleWeightColumnName, loss, l2Const, l1Threshold, maxIterations);
+            return new SdcaMultiClassTrainer(env, labelColumnName, featureColumnName, exampleWeightColumnName, loss, l2Regularization, l1Threshold, numberOfIterations);
         }
 
         /// <summary>
diff --git a/src/Microsoft.ML.StaticPipe/SdcaStaticExtensions.cs b/src/Microsoft.ML.StaticPipe/SdcaStaticExtensions.cs
index 20b244a3a9..0d8de683e4 100644
--- a/src/Microsoft.ML.StaticPipe/SdcaStaticExtensions.cs
+++ b/src/Microsoft.ML.StaticPipe/SdcaStaticExtensions.cs
@@ -20,9 +20,9 @@ public static class SdcaStaticExtensions
         /// <param name="label">The label, or dependent variable.</param>
         /// <param name="features">The features, or independent variables.</param>
         /// <param name="weights">The optional example weights.</param>
-        /// <param name="l2Const">The L2 regularization hyperparameter.</param>
+        /// <param name="l2Regularization">The L2 regularization hyperparameter.</param>
         /// <param name="l1Threshold">The L1 regularization hyperparameter. Higher values will tend to lead to more sparse model.</param>
-        /// <param name="maxIterations">The maximum number of passes to perform over the data.</param>
+        /// <param name="numberOfIterations">The maximum number of passes to perform over the data.</param>
         /// <param name="loss">The custom loss, if unspecified will be <see cref="SquaredLoss"/>.</param>
         /// <param name="onFit">A delegate that is called every time the
         /// <see cref="Estimator{TInShape, TShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the
@@ -38,25 +38,25 @@ public static class SdcaStaticExtensions
         /// </example>
         public static Scalar<float> Sdca(this RegressionCatalog.RegressionTrainers catalog,
             Scalar<float> label, Vector<float> features, Scalar<float> weights = null,
-            float? l2Const = null,
+            float? l2Regularization = null,
             float? l1Threshold = null,
-            int? maxIterations = null,
+            int? numberOfIterations = null,
             ISupportSdcaRegressionLoss loss = null,
             Action<LinearRegressionModelParameters> onFit = null)
         {
             Contracts.CheckValue(label, nameof(label));
             Contracts.CheckValue(features, nameof(features));
             Contracts.CheckValueOrNull(weights);
-            Contracts.CheckParam(!(l2Const < 0), nameof(l2Const), "Must not be negative, if specified.");
+            Contracts.CheckParam(!(l2Regularization < 0), nameof(l2Regularization), "Must not be negative, if specified.");
             Contracts.CheckParam(!(l1Threshold < 0), nameof(l1Threshold), "Must not be negative, if specified.");
-            Contracts.CheckParam(!(maxIterations < 1), nameof(maxIterations), "Must be positive if specified");
+            Contracts.CheckParam(!(numberOfIterations < 1), nameof(numberOfIterations), "Must be positive if specified");
             Contracts.CheckValueOrNull(loss);
             Contracts.CheckValueOrNull(onFit);
 
             var rec = new TrainerEstimatorReconciler.Regression(
                 (env, labelName, featuresName, weightsName) =>
                 {
-                    var trainer = new SdcaRegressionTrainer(env, labelName, featuresName, weightsName, loss, l2Const, l1Threshold, maxIterations);
+                    var trainer = new SdcaRegressionTrainer(env, labelName, featuresName, weightsName, loss, l2Regularization, l1Threshold, numberOfIterations);
                     if (onFit != null)
                         return trainer.WithOnFitDelegate(trans => onFit(trans.Model));
                     return trainer;
@@ -118,9 +118,9 @@ public static Scalar<float> Sdca(this RegressionCatalog.RegressionTrainers catal
         /// <param name="label">The label, or dependent variable.</param>
         /// <param name="features">The features, or independent variables.</param>
         /// <param name="weights">The optional example weights.</param>
-        /// <param name="l2Const">The L2 regularization hyperparameter.</param>
+        /// <param name="l2Regularization">The L2 regularization hyperparameter.</param>
         /// <param name="l1Threshold">The L1 regularization hyperparameter. Higher values will tend to lead to more sparse model.</param>
-        /// <param name="maxIterations">The maximum number of passes to perform over the data.</param>
+        /// <param name="numberOfIterations">The maximum number of passes to perform over the data.</param>
         /// <param name="onFit">A delegate that is called every time the
         /// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the
         /// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive
@@ -137,23 +137,23 @@ public static Scalar<float> Sdca(this RegressionCatalog.RegressionTrainers catal
         public static (Scalar<float> score, Scalar<float> probability, Scalar<bool> predictedLabel) Sdca(
             this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
             Scalar<bool> label, Vector<float> features, Scalar<float> weights = null,
-            float? l2Const = null,
+            float? l2Regularization = null,
             float? l1Threshold = null,
-            int? maxIterations = null,
+            int? numberOfIterations = null,
             Action<CalibratedModelParametersBase<LinearBinaryModelParameters, PlattCalibrator>> onFit = null)
         {
             Contracts.CheckValue(label, nameof(label));
             Contracts.CheckValue(features, nameof(features));
             Contracts.CheckValueOrNull(weights);
-            Contracts.CheckParam(!(l2Const < 0), nameof(l2Const), "Must not be negative, if specified.");
+            Contracts.CheckParam(!(l2Regularization < 0), nameof(l2Regularization), "Must not be negative, if specified.");
             Contracts.CheckParam(!(l1Threshold < 0), nameof(l1Threshold), "Must not be negative, if specified.");
-            Contracts.CheckParam(!(maxIterations < 1), nameof(maxIterations), "Must be positive if specified");
+            Contracts.CheckParam(!(numberOfIterations < 1), nameof(numberOfIterations), "Must be positive if specified");
             Contracts.CheckValueOrNull(onFit);
 
             var rec = new TrainerEstimatorReconciler.BinaryClassifier(
                 (env, labelName, featuresName, weightsName) =>
                 {
-                    var trainer = new SdcaBinaryTrainer(env, labelName, featuresName, weightsName, l2Const, l1Threshold, maxIterations);
+                    var trainer = new SdcaBinaryTrainer(env, labelName, featuresName, weightsName, l2Regularization, l1Threshold, numberOfIterations);
                     if (onFit != null)
                     {
                         return trainer.WithOnFitDelegate(trans =>
@@ -230,9 +230,9 @@ public static (Scalar<float> score, Scalar<float> probability, Scalar<bool> pred
         /// <param name="features">The features, or independent variables.</param>
         /// <param name="loss">The custom loss.</param>
         /// <param name="weights">The optional example weights.</param>
-        /// <param name="l2Const">The L2 regularization hyperparameter.</param>
+        /// <param name="l2Regularization">The L2 regularization hyperparameter.</param>
         /// <param name="l1Threshold">The L1 regularization hyperparameter. Higher values will tend to lead to more sparse model.</param>
-        /// <param name="maxIterations">The maximum number of passes to perform over the data.</param>
+        /// <param name="numberOfIterations">The maximum number of passes to perform over the data.</param>
         /// <param name="onFit">A delegate that is called every time the
         /// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the
         /// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive
@@ -245,24 +245,24 @@ public static (Scalar<float> score, Scalar<bool> predictedLabel) SdcaNonCalibrat
             Scalar<bool> label, Vector<float> features,
             ISupportSdcaClassificationLoss loss,
             Scalar<float> weights = null,
-            float? l2Const = null,
+            float? l2Regularization = null,
             float? l1Threshold = null,
-            int? maxIterations = null,
+            int? numberOfIterations = null,
             Action<LinearBinaryModelParameters> onFit = null)
         {
             Contracts.CheckValue(label, nameof(label));
             Contracts.CheckValue(features, nameof(features));
             Contracts.CheckValue(loss, nameof(loss));
             Contracts.CheckValueOrNull(weights);
-            Contracts.CheckParam(!(l2Const < 0), nameof(l2Const), "Must not be negative, if specified.");
+            Contracts.CheckParam(!(l2Regularization < 0), nameof(l2Regularization), "Must not be negative, if specified.");
             Contracts.CheckParam(!(l1Threshold < 0), nameof(l1Threshold), "Must not be negative, if specified.");
-            Contracts.CheckParam(!(maxIterations < 1), nameof(maxIterations), "Must be positive if specified");
+            Contracts.CheckParam(!(numberOfIterations < 1), nameof(numberOfIterations), "Must be positive if specified");
             Contracts.CheckValueOrNull(onFit);
 
             var rec = new TrainerEstimatorReconciler.BinaryClassifierNoCalibration(
                 (env, labelName, featuresName, weightsName) =>
                 {
-                    var trainer = new SdcaNonCalibratedBinaryTrainer(env, labelName, featuresName, weightsName, loss, l2Const, l1Threshold, maxIterations);
+                    var trainer = new SdcaNonCalibratedBinaryTrainer(env, labelName, featuresName, weightsName, loss, l2Regularization, l1Threshold, numberOfIterations);
                     if (onFit != null)
                     {
                         return trainer.WithOnFitDelegate(trans =>
@@ -335,9 +335,9 @@ public static (Scalar<float> score, Scalar<bool> predictedLabel) SdcaNonCalibrat
         /// <param name="features">The features, or independent variables.</param>
         /// <param name="loss">The custom loss.</param>
         /// <param name="weights">The optional example weights.</param>
-        /// <param name="l2Const">The L2 regularization hyperparameter.</param>
+        /// <param name="l2Regularization">The L2 regularization hyperparameter.</param>
         /// <param name="l1Threshold">The L1 regularization hyperparameter. Higher values will tend to lead to more sparse model.</param>
-        /// <param name="maxIterations">The maximum number of passes to perform over the data.</param>
+        /// <param name="numberOfIterations">The maximum number of passes to perform over the data.</param>
         /// <param name="onFit">A delegate that is called every time the
         /// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the
         /// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive
@@ -350,24 +350,24 @@ public static (Vector<float> score, Key<uint, TVal> predictedLabel) Sdca<TVal>(
             Vector<float> features,
             ISupportSdcaClassificationLoss loss = null,
             Scalar<float> weights = null,
-            float? l2Const = null,
+            float? l2Regularization = null,
             float? l1Threshold = null,
-            int? maxIterations = null,
+            int? numberOfIterations = null,
             Action<MulticlassLogisticRegressionModelParameters> onFit = null)
         {
             Contracts.CheckValue(label, nameof(label));
             Contracts.CheckValue(features, nameof(features));
             Contracts.CheckValueOrNull(loss);
             Contracts.CheckValueOrNull(weights);
-            Contracts.CheckParam(!(l2Const < 0), nameof(l2Const), "Must not be negative, if specified.");
+            Contracts.CheckParam(!(l2Regularization < 0), nameof(l2Regularization), "Must not be negative, if specified.");
             Contracts.CheckParam(!(l1Threshold < 0), nameof(l1Threshold), "Must not be negative, if specified.");
-            Contracts.CheckParam(!(maxIterations < 1), nameof(maxIterations), "Must be positive if specified");
+            Contracts.CheckParam(!(numberOfIterations < 1), nameof(numberOfIterations), "Must be positive if specified");
             Contracts.CheckValueOrNull(onFit);
 
             var rec = new TrainerEstimatorReconciler.MulticlassClassifier<TVal>(
                 (env, labelName, featuresName, weightsName) =>
                 {
-                    var trainer = new SdcaMultiClassTrainer(env, labelName, featuresName, weightsName, loss, l2Const, l1Threshold, maxIterations);
+                    var trainer = new SdcaMultiClassTrainer(env, labelName, featuresName, weightsName, loss, l2Regularization, l1Threshold, numberOfIterations);
                     if (onFit != null)
                         return trainer.WithOnFitDelegate(trans => onFit(trans.Model));
                     return trainer;
diff --git a/src/Microsoft.ML.StaticPipe/SgdStatic.cs b/src/Microsoft.ML.StaticPipe/SgdStatic.cs
index 032211bac2..e2da102afe 100644
--- a/src/Microsoft.ML.StaticPipe/SgdStatic.cs
+++ b/src/Microsoft.ML.StaticPipe/SgdStatic.cs
@@ -20,9 +20,9 @@ public static class SgdStaticExtensions
         /// <param name="label">The name of the label column.</param>
         /// <param name="features">The name of the feature column.</param>
         /// <param name="weights">The name for the example weight column.</param>
-        /// <param name="maxIterations">The maximum number of iterations; set to 1 to simulate online learning.</param>
-        /// <param name="initLearningRate">The initial learning rate used by SGD.</param>
-        /// <param name="l2Weight">The L2 regularization constant.</param>
+        /// <param name="numberOfIterations">The maximum number of iterations; set to 1 to simulate online learning.</param>
+        /// <param name="initialLearningRate">The initial learning rate used by SGD.</param>
+        /// <param name="l2Regularization">The L2 regularization constant.</param>
         /// <param name="onFit">A delegate that is called every time the
         /// <see cref="Estimator{TTupleInShape, TTupleOutShape, TTransformer}.Fit(DataView{TTupleInShape})"/> method is called on the
         /// <see cref="Estimator{TTupleInShape, TTupleOutShape, TTransformer}"/> instance created out of this. This delegate will receive
@@ -34,15 +34,15 @@ public static (Scalar<float> score, Scalar<float> probability, Scalar<bool> pred
             Scalar<bool> label,
             Vector<float> features,
             Scalar<float> weights = null,
-            int maxIterations = SgdBinaryTrainer.Options.Defaults.NumberOfIterations,
-            double initLearningRate = SgdBinaryTrainer.Options.Defaults.InitialLearningRate,
-            float l2Weight = SgdBinaryTrainer.Options.Defaults.L2Regularization,
+            int numberOfIterations = SgdBinaryTrainer.Options.Defaults.NumberOfIterations,
+            double initialLearningRate = SgdBinaryTrainer.Options.Defaults.InitialLearningRate,
+            float l2Regularization = SgdBinaryTrainer.Options.Defaults.L2Regularization,
             Action<CalibratedModelParametersBase<LinearBinaryModelParameters, PlattCalibrator>> onFit = null)
         {
             var rec = new TrainerEstimatorReconciler.BinaryClassifier(
                 (env, labelName, featuresName, weightsName) =>
                 {
-                    var trainer = new SgdBinaryTrainer(env, labelName, featuresName, weightsName, maxIterations, initLearningRate, l2Weight);
+                    var trainer = new SgdBinaryTrainer(env, labelName, featuresName, weightsName, numberOfIterations, initialLearningRate, l2Regularization);
 
                     if (onFit != null)
                         return trainer.WithOnFitDelegate(trans => onFit(trans.Model));
@@ -100,9 +100,9 @@ public static (Scalar<float> score, Scalar<float> probability, Scalar<bool> pred
         /// <param name="label">The name of the label column.</param>
         /// <param name="features">The name of the feature column.</param>
         /// <param name="weights">The name for the example weight column.</param>
-        /// <param name="maxIterations">The maximum number of iterations; set to 1 to simulate online learning.</param>
-        /// <param name="initLearningRate">The initial learning rate used by SGD.</param>
-        /// <param name="l2Weight">The L2 regularization constant.</param>
+        /// <param name="numberOfIterations">The maximum number of iterations; set to 1 to simulate online learning.</param>
+        /// <param name="initialLearningRate">The initial learning rate used by SGD.</param>
+        /// <param name="l2Regularization">The L2 regularization constant.</param>
         /// <param name="loss">The loss function to use.</param>
         /// <param name="onFit">A delegate that is called every time the
         /// <see cref="Estimator{TTupleInShape, TTupleOutShape, TTransformer}.Fit(DataView{TTupleInShape})"/> method is called on the
@@ -115,9 +115,9 @@ public static (Scalar<float> score, Scalar<bool> predictedLabel) StochasticGradi
             Scalar<bool> label,
             Vector<float> features,
             Scalar<float> weights = null,
-            int maxIterations = SgdNonCalibratedBinaryTrainer.Options.Defaults.NumberOfIterations,
-            double initLearningRate = SgdNonCalibratedBinaryTrainer.Options.Defaults.InitialLearningRate,
-            float l2Weight = SgdNonCalibratedBinaryTrainer.Options.Defaults.L2Regularization,
+            int numberOfIterations = SgdNonCalibratedBinaryTrainer.Options.Defaults.NumberOfIterations,
+            double initialLearningRate = SgdNonCalibratedBinaryTrainer.Options.Defaults.InitialLearningRate,
+            float l2Regularization = SgdNonCalibratedBinaryTrainer.Options.Defaults.L2Regularization,
             IClassificationLoss loss = null,
             Action<LinearBinaryModelParameters> onFit = null)
         {
@@ -125,7 +125,7 @@ public static (Scalar<float> score, Scalar<bool> predictedLabel) StochasticGradi
                 (env, labelName, featuresName, weightsName) =>
                 {
                     var trainer = new SgdNonCalibratedBinaryTrainer(env, labelName, featuresName, weightsName,
-                        maxIterations, initLearningRate, l2Weight, loss);
+                        numberOfIterations, initialLearningRate, l2Regularization, loss);
 
                     if (onFit != null)
                         return trainer.WithOnFitDelegate(trans => onFit(trans.Model));
diff --git a/test/BaselineOutput/Common/EntryPoints/core_manifest.json b/test/BaselineOutput/Common/EntryPoints/core_manifest.json
index 26f5bb065d..6caab60446 100644
--- a/test/BaselineOutput/Common/EntryPoints/core_manifest.json
+++ b/test/BaselineOutput/Common/EntryPoints/core_manifest.json
@@ -15262,11 +15262,12 @@
           }
         },
         {
-          "Name": "CheckFrequency",
+          "Name": "ConvergenceCheckFrequency",
           "Type": "Int",
           "Desc": "Convergence check frequency (in terms of number of iterations). Set as negative or zero for not checking at all. If left blank, it defaults to check after every 'numThreads' iterations.",
           "Aliases": [
-            "checkFreq"
+            "checkFreq",
+            "CheckFrequency"
           ],
           "Required": false,
           "SortOrder": 150.0,
@@ -15534,11 +15535,12 @@
           }
         },
         {
-          "Name": "CheckFrequency",
+          "Name": "ConvergenceCheckFrequency",
           "Type": "Int",
           "Desc": "Convergence check frequency (in terms of number of iterations). Set as negative or zero for not checking at all. If left blank, it defaults to check after every 'numThreads' iterations.",
           "Aliases": [
-            "checkFreq"
+            "checkFreq",
+            "CheckFrequency"
           ],
           "Required": false,
           "SortOrder": 150.0,
@@ -15806,11 +15808,12 @@
           }
         },
         {
-          "Name": "CheckFrequency",
+          "Name": "ConvergenceCheckFrequency",
           "Type": "Int",
           "Desc": "Convergence check frequency (in terms of number of iterations). Set as negative or zero for not checking at all. If left blank, it defaults to check after every 'numThreads' iterations.",
           "Aliases": [
-            "checkFreq"
+            "checkFreq",
+            "CheckFrequency"
           ],
           "Required": false,
           "SortOrder": 150.0,
diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs
index a94ca5c5fd..0046e4a469 100644
--- a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs
+++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs
@@ -2466,7 +2466,7 @@ public void TestInputBuilderComponentFactories()
             var options = new LegacySdcaBinaryTrainer.Options()
             {
                 NormalizeFeatures = NormalizeOption.Yes,
-                CheckFrequency = 42
+                ConvergenceCheckFrequency = 42
             };
 
             var inputBindingMap = new Dictionary<string, List<ParameterBinding>>();
@@ -2481,7 +2481,7 @@ public void TestInputBuilderComponentFactories()
 
             var expected =
                 @"{
-  ""CheckFrequency"": 42,
+  ""ConvergenceCheckFrequency"": 42,
   ""TrainingData"": ""$data"",
   ""NormalizeFeatures"": ""Yes""
 }";
@@ -2497,7 +2497,7 @@ public void TestInputBuilderComponentFactories()
   ""LossFunction"": {
     ""Name"": ""HingeLoss""
   },
-  ""CheckFrequency"": 42,
+  ""ConvergenceCheckFrequency"": 42,
   ""TrainingData"": ""$data"",
   ""NormalizeFeatures"": ""Yes""
 }";
@@ -2516,7 +2516,7 @@ public void TestInputBuilderComponentFactories()
       ""Margin"": 2.0
     }
   },
-  ""CheckFrequency"": 42,
+  ""ConvergenceCheckFrequency"": 42,
   ""TrainingData"": ""$data"",
   ""NormalizeFeatures"": ""Yes""
 }";
diff --git a/test/Microsoft.ML.Functional.Tests/DataTransformation.cs b/test/Microsoft.ML.Functional.Tests/DataTransformation.cs
index 6191cce2de..40e9606114 100644
--- a/test/Microsoft.ML.Functional.Tests/DataTransformation.cs
+++ b/test/Microsoft.ML.Functional.Tests/DataTransformation.cs
@@ -145,7 +145,7 @@ void ExtensibilityModifyTextFeaturization()
                     }, "SentimentText")
                 .AppendCacheCheckpoint(mlContext)
                 .Append(mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscent(
-                    new SdcaBinaryTrainer.Options { NumThreads = 1 }));
+                    new SdcaBinaryTrainer.Options { NumberOfThreads = 1 }));
 
             // Train the model.
             var model = pipeline.Fit(data);
diff --git a/test/Microsoft.ML.StaticPipelineTesting/Training.cs b/test/Microsoft.ML.StaticPipelineTesting/Training.cs
index 14c08f226b..b2e89c3d88 100644
--- a/test/Microsoft.ML.StaticPipelineTesting/Training.cs
+++ b/test/Microsoft.ML.StaticPipelineTesting/Training.cs
@@ -385,7 +385,7 @@ public void SdcaMulticlass()
                 .Append(r => (r.label, preds: catalog.Trainers.Sdca(
                     r.label,
                     r.features,
-                    maxIterations: 2,
+                    numberOfIterations: 2,
                     loss: loss, onFit: p => pred = p)));
 
             var pipe = reader.Append(est);
@@ -430,7 +430,7 @@ public void CrossValidate()
                 .Append(r => (r.label, preds: catalog.Trainers.Sdca(
                     r.label,
                     r.features,
-                    maxIterations: 2)));
+                    numberOfIterations: 2)));
 
             var results = catalog.CrossValidate(reader.Load(dataSource), est, r => r.label)
                 .Select(x => x.metrics).ToArray();
diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/SdcaTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/SdcaTests.cs
index af69ce5681..e96ef170ad 100644
--- a/test/Microsoft.ML.Tests/TrainerEstimators/SdcaTests.cs
+++ b/test/Microsoft.ML.Tests/TrainerEstimators/SdcaTests.cs
@@ -58,7 +58,7 @@ public void SdcaLogisticRegression()
 
             // Step 2: Create a binary classifier.
             // We set the "Label" column as the label of the dataset, and the "Features" column as the features column.
-            var pipeline = mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscent(labelColumnName: "Label", featureColumnName: "Features", l2Const: 0.001f);
+            var pipeline = mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscent(labelColumnName: "Label", featureColumnName: "Features", l2Regularization: 0.001f);
 
             // Step 3: Train the pipeline created.
             var model = pipeline.Fit(data);
@@ -103,7 +103,7 @@ public void SdcaSupportVectorMachine()
             // Step 2: Create a binary classifier.
             // We set the "Label" column as the label of the dataset, and the "Features" column as the features column.
             var pipeline = mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscentNonCalibrated(
-                labelColumnName: "Label", featureColumnName: "Features", loss: new HingeLoss(), l2Const: 0.001f);
+                labelColumnName: "Label", featureColumnName: "Features", loss: new HingeLoss(), l2Regularization: 0.001f);
 
             // Step 3: Train the pipeline created.
             var model = pipeline.Fit(data);
diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/TrainerEstimators.cs b/test/Microsoft.ML.Tests/TrainerEstimators/TrainerEstimators.cs
index fd3f0b012c..cf0ca0aefb 100644
--- a/test/Microsoft.ML.Tests/TrainerEstimators/TrainerEstimators.cs
+++ b/test/Microsoft.ML.Tests/TrainerEstimators/TrainerEstimators.cs
@@ -89,7 +89,7 @@ public void KMeansEstimator()
         [Fact]
         public void TestEstimatorHogwildSGD()
         {
-            var trainers = new[] { ML.BinaryClassification.Trainers.StochasticGradientDescent(l2Weight: 0, maxIterations: 80),
+            var trainers = new[] { ML.BinaryClassification.Trainers.StochasticGradientDescent(l2Regularization: 0, numberOfIterations: 80),
                 ML.BinaryClassification.Trainers.StochasticGradientDescent(new Trainers.SgdBinaryTrainer.Options(){ L2Regularization = 0, NumberOfIterations = 80})};
 
             foreach (var trainer in trainers)