From 95fcf1a1e5f79993de944ac15ad5ccbe60152f30 Mon Sep 17 00:00:00 2001
From: Zeeshan Siddiqui <mzs@microsoft.com>
Date: Wed, 17 Apr 2019 22:31:47 -0700
Subject: [PATCH 1/4] Xml documentation for Calibrated and Non Calibrated SGD
 Trainer.

---
 .../Standard/SdcaBinary.cs                    | 63 +++++++++++++++++--
 .../StandardTrainersCatalog.cs                | 16 ++---
 2 files changed, 65 insertions(+), 14 deletions(-)
diff --git a/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs b/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs
index bbe982b6cb..4a9860a4c2 100644
--- a/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs
+++ b/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs
@@ -137,7 +137,7 @@ private protected float WScaledDot(in VBuffer<float> features, Double scaling, i
         }
 
         private protected virtual int ComputeNumThreads(FloatLabelCursor.Factory cursorFactory)
-            =>  Math.Min(8, Math.Max(1, Environment.ProcessorCount / 2));
+            => Math.Min(8, Math.Max(1, Environment.ProcessorCount / 2));
     }
 
     public abstract class SdcaTrainerBase<TOptions, TTransformer, TModel> : StochasticTrainerBase<TTransformer, TModel>
@@ -2170,21 +2170,43 @@ private protected override void CheckLabel(RoleMappedData examples, out int weig
     }
 
     /// <summary>
-    /// The <see cref="IEstimator{TTransformer}"/> for training logistic regression using a parallel stochastic gradient method.
+    /// The<see cref="IEstimator{TTransformer}"/> for training logistic regression using a parallel stochastic gradient method.
     /// The trained model is <a href='https://en.wikipedia.org/wiki/Calibration_(statistics)'>calibrated</a> and can produce probability by feeding the output value of the
     /// linear function to a <see cref="PlattCalibrator"/>.
     /// </summary>
     /// <remarks>
+    /// <format type="text/markdown"><![CDATA[
+    /// To create this trainer, use [SgdCalibrated](xref:Microsoft.ML.StandardTrainersCatalog.SgdCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,System.String,System.String,System.String,System.Int32,System.Double,System.Single))
+    /// or [SgdCalibrated(Options)](xref:Microsoft.ML.StandardTrainersCatalog.SgdCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.SgdCalibratedTrainer.Options)).
+    ///
+    /// [!include[io](~/../docs/samples/docs/api-reference/io-columns-binary-classification.md)]
+    ///
+    /// ### Trainer Characteristics
+    /// |  |  |
+    /// | -- | -- |
+    /// | Machine learning task | Binary classification |
+    /// | Is normalization required? | Yes |
+    /// | Is caching required? | No |
+    /// | Required NuGet in addition to Microsoft.ML | None |
+    ///
+    /// ### Training Algorithm Details
     /// The Stochastic Gradient Descent (SGD) is one of the popular stochastic optimization procedures that can be integrated
     /// into several machine learning tasks to achieve state-of-the-art performance. This trainer implements the Hogwild SGD for binary classification
     /// that supports multi-threading without any locking. If the associated optimization problem is sparse, Hogwild SGD achieves a nearly optimal
-    /// rate of convergence. For more details about Hogwild SGD, please refer to http://arxiv.org/pdf/1106.5730v2.pdf.
+    /// rate of convergence. For more details about Hogwild SGD can be found [here](http://arxiv.org/pdf/1106.5730v2.pdf).
+    /// ]]>
+    /// </format>
     /// </remarks>
+    /// <seealso cref="Microsoft.ML.StandardTrainersCatalog.SgdCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,System.String,System.String,System.String,System.Int32,System.Double,System.Single)"/>
+    /// <seealso cref="Microsoft.ML.StandardTrainersCatalog.SgdCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.SgdCalibratedTrainer.Options)"/>
+    /// <seealso cref="Options"/>
     public sealed class SgdCalibratedTrainer :
         SgdBinaryTrainerBase<CalibratedModelParametersBase<LinearBinaryModelParameters, PlattCalibrator>>
     {
+
         /// <summary>
-        /// Options for the <see cref="SgdCalibratedTrainer"/>.
+        /// Options for the <see cref="SgdCalibratedTrainer"/> as used in
+        /// [SgdCalibrated(Options)](xref:Microsoft.ML.StandardTrainersCatalog.SgdCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.SgdCalibratedTrainer.Options)).
         /// </summary>
         public sealed class Options : OptionsBase
         {
@@ -2240,12 +2262,41 @@ private protected override CalibratedModelParametersBase<LinearBinaryModelParame
     }
 
     /// <summary>
-    /// <see cref="SgdNonCalibratedTrainer"/> can train a linear classification model by minimizing any loss function
-    /// which implements <see cref="IClassificationLoss"/>.
+    /// The <see cref="IEstimator{TTransformer}"/> for training logistic regression using a parallel stochastic gradient method.
     /// </summary>
+    /// <remarks>
+    /// <format type="text/markdown"><![CDATA[
+    /// To create this trainer, use [SgdNonCalibrated](xref:Microsoft.ML.StandardTrainersCatalog.SgdNonCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,System.String,System.String,System.String,Microsoft.ML.Trainers.IClassificationLoss,System.Int32,System.Double,System.Single))
+    /// or [SgdNonCalibrated(Options)](xref:Microsoft.ML.StandardTrainersCatalog.SgdNonCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.SgdNonCalibratedTrainer.Options)).
+    ///
+    /// [!include[io](~/../docs/samples/docs/api-reference/io-columns-binary-classification.md)]
+    ///
+    /// ### Trainer Characteristics
+    /// |  |  |
+    /// | -- | -- |
+    /// | Machine learning task | Binary classification |
+    /// | Is normalization required? | Yes |
+    /// | Is caching required? | No |
+    /// | Required NuGet in addition to Microsoft.ML | None |
+    ///
+    /// ### Training Algorithm Details
+    /// The Stochastic Gradient Descent (SGD) is one of the popular stochastic optimization procedures that can be integrated
+    /// into several machine learning tasks to achieve state-of-the-art performance. This trainer implements the Hogwild SGD for binary classification
+    /// that supports multi-threading without any locking. If the associated optimization problem is sparse, Hogwild SGD achieves a nearly optimal
+    /// rate of convergence. For more details about Hogwild SGD can be found [here](http://arxiv.org/pdf/1106.5730v2.pdf).
+    /// ]]>
+    /// </format>
+    /// </remarks>
+    /// <seealso cref="Microsoft.ML.StandardTrainersCatalog.SgdNonCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,System.String,System.String,System.String,Microsoft.ML.Trainers.IClassificationLoss,System.Int32,System.Double,System.Single)"/>
+    /// <seealso cref="Microsoft.ML.StandardTrainersCatalog.SgdNonCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.SgdNonCalibratedTrainer.Options)"/>
+    /// <seealso cref="Options"/>
     public sealed class SgdNonCalibratedTrainer :
         SgdBinaryTrainerBase<LinearBinaryModelParameters>
     {
+        /// <summary>
+        /// Options for the <see cref="SgdNonCalibratedTrainer"/> as used in
+        /// [SgdNonCalibrated(Options)](xref:Microsoft.ML.StandardTrainersCatalog.SgdNonCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.SgdNonCalibratedTrainer.Options)).
+        /// </summary>
         public sealed class Options : OptionsBase
         {
             /// <summary>
diff --git a/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs b/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs
index 486a8dfd9f..c22dfa1e3b 100644
--- a/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs
+++ b/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs
@@ -17,12 +17,12 @@ namespace Microsoft.ML
     public static class StandardTrainersCatalog
     {
         /// <summary>
-        /// Predict a target using a linear classification model trained with <see cref="SgdCalibratedTrainer"/>.
+        /// Creates a <see cref="Trainers.SgdCalibratedTrainer"/> that predicts a target using a linear classification model.
         /// Stochastic gradient descent (SGD) is an iterative algorithm that optimizes a differentiable objective function.
         /// </summary>
         /// <param name="catalog">The binary classification catalog trainer object.</param>
-        /// <param name="labelColumnName">The name of the label column, or dependent variable.</param>
-        /// <param name="featureColumnName">The features, or independent variables.</param>
+        /// <param name="labelColumnName">The name of the label column, or dependent variable. The column data must be <see cref="System.Boolean"/>.</param>
+        /// <param name="featureColumnName">The features, or independent variables. The column data must be a known-sized vector of <see cref="System.Single"/></param>
         /// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
         /// <param name="numberOfIterations">The maximum number of passes through the training dataset; set to 1 to simulate online learning.</param>
         /// <param name="learningRate">The initial learning rate used by SGD.</param>
@@ -49,7 +49,7 @@ public static SgdCalibratedTrainer SgdCalibrated(this BinaryClassificationCatalo
         }
 
         /// <summary>
-        /// Predict a target using a linear classification model trained with <see cref="SgdCalibratedTrainer"/> and advanced options.
+        /// Creates a <see cref="Trainers.SgdCalibratedTrainer"/> that predicts a target using a linear classification model and advanced options.
         /// Stochastic gradient descent (SGD) is an iterative algorithm that optimizes a differentiable objective function.
         /// </summary>
         /// <param name="catalog">The binary classification catalog trainer object.</param>
@@ -72,12 +72,12 @@ public static SgdCalibratedTrainer SgdCalibrated(this BinaryClassificationCatalo
         }
 
         /// <summary>
-        /// Predict a target using a linear classification model trained with <see cref="SgdNonCalibratedTrainer"/>.
+        /// Creates a <see cref="Trainers.SgdNonCalibratedTrainer"/> that predicts a target using a linear classification model.
         /// Stochastic gradient descent (SGD) is an iterative algorithm that optimizes a differentiable objective function.
         /// </summary>
         /// <param name="catalog">The binary classification catalog trainer object.</param>
-        /// <param name="labelColumnName">The name of the label column, or dependent variable.</param>
-        /// <param name="featureColumnName">The features, or independent variables.</param>
+        /// <param name="labelColumnName">The name of the label column, or dependent variable. The column data must be <see cref="System.Boolean"/>.</param>
+        /// <param name="featureColumnName">The features, or independent variables. The column data must be a known-sized vector of <see cref="System.Single"/></param>
         /// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
         /// <param name="lossFunction">The <a href="https://en.wikipedia.org/wiki/Loss_function">loss</a> function minimized in the training process. Using, for example, <see cref="HingeLoss"/> leads to a support vector machine trainer.</param>
         /// <param name="numberOfIterations">The maximum number of passes through the training dataset; set to 1 to simulate online learning.</param>
@@ -106,7 +106,7 @@ public static SgdNonCalibratedTrainer SgdNonCalibrated(this BinaryClassification
         }
 
         /// <summary>
-        /// Predict a target using a linear classification model trained with <see cref="SgdNonCalibratedTrainer"/> and advanced options.
+        /// Creates a <see cref="Trainers.SgdNonCalibratedTrainer"/> that predicts a target using a linear classification model and advanced options.
         /// Stochastic gradient descent (SGD) is an iterative algorithm that optimizes a differentiable objective function.
         /// </summary>
         /// <param name="catalog">The binary classification catalog trainer object.</param>

From 8d0e86e54ba8e59c25646dd43c0e0a095e7f9ced Mon Sep 17 00:00:00 2001
From: Zeeshan Siddiqui <mzs@microsoft.com>
Date: Sun, 21 Apr 2019 12:17:00 -0700
Subject: [PATCH 2/4] PR feedback.

---
 .../Standard/SdcaBinary.cs                    | 28 +++++++++----------
 .../StandardTrainersCatalog.cs                | 12 ++++----
 2 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs b/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs
index 4a9860a4c2..e509f11273 100644
--- a/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs
+++ b/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs
@@ -2170,7 +2170,7 @@ private protected override void CheckLabel(RoleMappedData examples, out int weig
     }
 
     /// <summary>
-    /// The<see cref="IEstimator{TTransformer}"/> for training logistic regression using a parallel stochastic gradient method.
+    /// The <see cref="IEstimator{TTransformer}"/> for training logistic regression using a parallel stochastic gradient method.
     /// The trained model is <a href='https://en.wikipedia.org/wiki/Calibration_(statistics)'>calibrated</a> and can produce probability by feeding the output value of the
     /// linear function to a <see cref="PlattCalibrator"/>.
     /// </summary>
@@ -2179,7 +2179,7 @@ private protected override void CheckLabel(RoleMappedData examples, out int weig
     /// To create this trainer, use [SgdCalibrated](xref:Microsoft.ML.StandardTrainersCatalog.SgdCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,System.String,System.String,System.String,System.Int32,System.Double,System.Single))
     /// or [SgdCalibrated(Options)](xref:Microsoft.ML.StandardTrainersCatalog.SgdCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.SgdCalibratedTrainer.Options)).
     ///
-    /// [!include[io](~/../docs/samples/docs/api-reference/io-columns-binary-classification.md)]
+    /// [!include[io](~/../docs/samples/docs/api-reference/io-columns-regression.md)]
     ///
     /// ### Trainer Characteristics
     /// |  |  |
@@ -2191,14 +2191,14 @@ private protected override void CheckLabel(RoleMappedData examples, out int weig
     ///
     /// ### Training Algorithm Details
     /// The Stochastic Gradient Descent (SGD) is one of the popular stochastic optimization procedures that can be integrated
-    /// into several machine learning tasks to achieve state-of-the-art performance. This trainer implements the Hogwild SGD for binary classification
-    /// that supports multi-threading without any locking. If the associated optimization problem is sparse, Hogwild SGD achieves a nearly optimal
-    /// rate of convergence. For more details about Hogwild SGD can be found [here](http://arxiv.org/pdf/1106.5730v2.pdf).
+    /// into several machine learning tasks to achieve state-of-the-art performance. This trainer implements the Hogwild Stochastic Gradient Descent for binary classification
+    /// that supports multi-threading without any locking. If the associated optimization problem is sparse, Hogwild Stochastic Gradient Descent achieves a nearly optimal
+    /// rate of convergence. For more details about Hogwild Stochastic Gradient Descent can be found [here](http://arxiv.org/pdf/1106.5730v2.pdf).
     /// ]]>
     /// </format>
     /// </remarks>
-    /// <seealso cref="Microsoft.ML.StandardTrainersCatalog.SgdCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,System.String,System.String,System.String,System.Int32,System.Double,System.Single)"/>
-    /// <seealso cref="Microsoft.ML.StandardTrainersCatalog.SgdCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.SgdCalibratedTrainer.Options)"/>
+    /// <seealso cref="StandardTrainersCatalog.SgdCalibrated(BinaryClassificationCatalog.BinaryClassificationTrainers, string, string, string, int, double, float)"/>
+    /// <seealso cref="StandardTrainersCatalog.SgdCalibrated(BinaryClassificationCatalog.BinaryClassificationTrainers, SgdCalibratedTrainer.Options)"/>
     /// <seealso cref="Options"/>
     public sealed class SgdCalibratedTrainer :
         SgdBinaryTrainerBase<CalibratedModelParametersBase<LinearBinaryModelParameters, PlattCalibrator>>
@@ -2269,7 +2269,7 @@ private protected override CalibratedModelParametersBase<LinearBinaryModelParame
     /// To create this trainer, use [SgdNonCalibrated](xref:Microsoft.ML.StandardTrainersCatalog.SgdNonCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,System.String,System.String,System.String,Microsoft.ML.Trainers.IClassificationLoss,System.Int32,System.Double,System.Single))
     /// or [SgdNonCalibrated(Options)](xref:Microsoft.ML.StandardTrainersCatalog.SgdNonCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.SgdNonCalibratedTrainer.Options)).
     ///
-    /// [!include[io](~/../docs/samples/docs/api-reference/io-columns-binary-classification.md)]
+    /// [!include[io](~/../docs/samples/docs/api-reference/io-columns-regression.md)]
     ///
     /// ### Trainer Characteristics
     /// |  |  |
@@ -2280,15 +2280,15 @@ private protected override CalibratedModelParametersBase<LinearBinaryModelParame
     /// | Required NuGet in addition to Microsoft.ML | None |
     ///
     /// ### Training Algorithm Details
-    /// The Stochastic Gradient Descent (SGD) is one of the popular stochastic optimization procedures that can be integrated
-    /// into several machine learning tasks to achieve state-of-the-art performance. This trainer implements the Hogwild SGD for binary classification
-    /// that supports multi-threading without any locking. If the associated optimization problem is sparse, Hogwild SGD achieves a nearly optimal
-    /// rate of convergence. For more details about Hogwild SGD can be found [here](http://arxiv.org/pdf/1106.5730v2.pdf).
+    /// The Stochastic Gradient Descent is one of the popular stochastic optimization procedures that can be integrated
+    /// into several machine learning tasks to achieve state-of-the-art performance. This trainer implements the Hogwild Stochastic Gradient Descent for binary classification
+    /// that supports multi-threading without any locking. If the associated optimization problem is sparse, Hogwild Stochastic Gradient Descent achieves a nearly optimal
+    /// rate of convergence. For more details about Hogwild Stochastic Gradient Descent can be found [here](http://arxiv.org/pdf/1106.5730v2.pdf).
     /// ]]>
     /// </format>
     /// </remarks>
-    /// <seealso cref="Microsoft.ML.StandardTrainersCatalog.SgdNonCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,System.String,System.String,System.String,Microsoft.ML.Trainers.IClassificationLoss,System.Int32,System.Double,System.Single)"/>
-    /// <seealso cref="Microsoft.ML.StandardTrainersCatalog.SgdNonCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.SgdNonCalibratedTrainer.Options)"/>
+    /// <seealso cref="StandardTrainersCatalog.SgdNonCalibrated(BinaryClassificationCatalog.BinaryClassificationTrainers, string, string, string, IClassificationLoss, int, double, float)"/>
+    /// <seealso cref="StandardTrainersCatalog.SgdNonCalibrated(BinaryClassificationCatalog.BinaryClassificationTrainers, SgdNonCalibratedTrainer.Options)"/>
     /// <seealso cref="Options"/>
     public sealed class SgdNonCalibratedTrainer :
         SgdBinaryTrainerBase<LinearBinaryModelParameters>
diff --git a/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs b/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs
index c22dfa1e3b..55c189b32a 100644
--- a/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs
+++ b/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs
@@ -17,12 +17,12 @@ namespace Microsoft.ML
     public static class StandardTrainersCatalog
     {
         /// <summary>
-        /// Creates a <see cref="Trainers.SgdCalibratedTrainer"/> that predicts a target using a linear classification model.
+        /// Create <see cref="SgdCalibratedTrainer"/>, which predicts a target using a linear classification model.
         /// Stochastic gradient descent (SGD) is an iterative algorithm that optimizes a differentiable objective function.
         /// </summary>
         /// <param name="catalog">The binary classification catalog trainer object.</param>
         /// <param name="labelColumnName">The name of the label column, or dependent variable. The column data must be <see cref="System.Boolean"/>.</param>
-        /// <param name="featureColumnName">The features, or independent variables. The column data must be a known-sized vector of <see cref="System.Single"/></param>
+        /// <param name="featureColumnName">The features, or independent variables. The column data must be a known-sized vector of <see cref="System.Single"/>.</param>
         /// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
         /// <param name="numberOfIterations">The maximum number of passes through the training dataset; set to 1 to simulate online learning.</param>
         /// <param name="learningRate">The initial learning rate used by SGD.</param>
@@ -49,7 +49,7 @@ public static SgdCalibratedTrainer SgdCalibrated(this BinaryClassificationCatalo
         }
 
         /// <summary>
-        /// Creates a <see cref="Trainers.SgdCalibratedTrainer"/> that predicts a target using a linear classification model and advanced options.
+        /// Create <see cref="Trainers.SgdCalibratedTrainer"/> with advanced options, which predicts a target using a linear classification model.
         /// Stochastic gradient descent (SGD) is an iterative algorithm that optimizes a differentiable objective function.
         /// </summary>
         /// <param name="catalog">The binary classification catalog trainer object.</param>
@@ -72,12 +72,12 @@ public static SgdCalibratedTrainer SgdCalibrated(this BinaryClassificationCatalo
         }
 
         /// <summary>
-        /// Creates a <see cref="Trainers.SgdNonCalibratedTrainer"/> that predicts a target using a linear classification model.
+        /// Create <see cref="Trainers.SgdNonCalibratedTrainer"/>, which predicts a target using a linear classification model.
         /// Stochastic gradient descent (SGD) is an iterative algorithm that optimizes a differentiable objective function.
         /// </summary>
         /// <param name="catalog">The binary classification catalog trainer object.</param>
         /// <param name="labelColumnName">The name of the label column, or dependent variable. The column data must be <see cref="System.Boolean"/>.</param>
-        /// <param name="featureColumnName">The features, or independent variables. The column data must be a known-sized vector of <see cref="System.Single"/></param>
+        /// <param name="featureColumnName">The features, or independent variables. The column data must be a known-sized vector of <see cref="System.Single"/>.</param>
         /// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
         /// <param name="lossFunction">The <a href="https://en.wikipedia.org/wiki/Loss_function">loss</a> function minimized in the training process. Using, for example, <see cref="HingeLoss"/> leads to a support vector machine trainer.</param>
         /// <param name="numberOfIterations">The maximum number of passes through the training dataset; set to 1 to simulate online learning.</param>
@@ -106,7 +106,7 @@ public static SgdNonCalibratedTrainer SgdNonCalibrated(this BinaryClassification
         }
 
         /// <summary>
-        /// Creates a <see cref="Trainers.SgdNonCalibratedTrainer"/> that predicts a target using a linear classification model and advanced options.
+        /// Create <see cref="Trainers.SgdNonCalibratedTrainer"/> with advanced options, which predicts a target using a linear classification model.
         /// Stochastic gradient descent (SGD) is an iterative algorithm that optimizes a differentiable objective function.
         /// </summary>
         /// <param name="catalog">The binary classification catalog trainer object.</param>

From e58b622e815860c98b042bdedca91ebcaac5874a Mon Sep 17 00:00:00 2001
From: Zeeshan Siddiqui <mzs@microsoft.com>
Date: Sun, 21 Apr 2019 12:22:24 -0700
Subject: [PATCH 3/4] PR feedback.

---
 docs/api-reference/algo-details-sgd.md               |  9 +++++++++
 .../Standard/SdcaBinary.cs                           | 12 ++----------
 2 files changed, 11 insertions(+), 10 deletions(-)
 create mode 100644 docs/api-reference/algo-details-sgd.md

diff --git a/docs/api-reference/algo-details-sgd.md b/docs/api-reference/algo-details-sgd.md
new file mode 100644
index 0000000000..d008e590ae
--- /dev/null
+++ b/docs/api-reference/algo-details-sgd.md
@@ -0,0 +1,9 @@
+### Training Algorithm Details
+The Stochastic Gradient Descent (SGD) is one of the popular stochastic
+optimization procedures that can be integrated into several machine learning
+tasks to achieve state-of-the-art performance. This trainer implements the
+Hogwild Stochastic Gradient Descent for binary classification that supports
+multi-threading without any locking. If the associated optimization problem is
+sparse, Hogwild Stochastic Gradient Descent achieves a nearly optimal rate of
+convergence. For more details about Hogwild Stochastic Gradient Descent can be
+found [here](http://arxiv.org/pdf/1106.5730v2.pdf).
\ No newline at end of file
diff --git a/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs b/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs
index e509f11273..f74e5d46cd 100644
--- a/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs
+++ b/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs
@@ -2189,11 +2189,7 @@ private protected override void CheckLabel(RoleMappedData examples, out int weig
     /// | Is caching required? | No |
     /// | Required NuGet in addition to Microsoft.ML | None |
     ///
-    /// ### Training Algorithm Details
-    /// The Stochastic Gradient Descent (SGD) is one of the popular stochastic optimization procedures that can be integrated
-    /// into several machine learning tasks to achieve state-of-the-art performance. This trainer implements the Hogwild Stochastic Gradient Descent for binary classification
-    /// that supports multi-threading without any locking. If the associated optimization problem is sparse, Hogwild Stochastic Gradient Descent achieves a nearly optimal
-    /// rate of convergence. For more details about Hogwild Stochastic Gradient Descent can be found [here](http://arxiv.org/pdf/1106.5730v2.pdf).
+    /// [!include[io](~/../docs/samples/docs/api-reference/algo-details-sgd.md)]
     /// ]]>
     /// </format>
     /// </remarks>
@@ -2279,11 +2275,7 @@ private protected override CalibratedModelParametersBase<LinearBinaryModelParame
     /// | Is caching required? | No |
     /// | Required NuGet in addition to Microsoft.ML | None |
     ///
-    /// ### Training Algorithm Details
-    /// The Stochastic Gradient Descent is one of the popular stochastic optimization procedures that can be integrated
-    /// into several machine learning tasks to achieve state-of-the-art performance. This trainer implements the Hogwild Stochastic Gradient Descent for binary classification
-    /// that supports multi-threading without any locking. If the associated optimization problem is sparse, Hogwild Stochastic Gradient Descent achieves a nearly optimal
-    /// rate of convergence. For more details about Hogwild Stochastic Gradient Descent can be found [here](http://arxiv.org/pdf/1106.5730v2.pdf).
+    /// [!include[io](~/../docs/samples/docs/api-reference/algo-details-sgd.md)]
     /// ]]>
     /// </format>
     /// </remarks>

From dec85016ae8acd6f073bffa716650d835e64d435 Mon Sep 17 00:00:00 2001
From: Zeeshan Siddiqui <mzs@microsoft.com>
Date: Sun, 21 Apr 2019 12:52:25 -0700
Subject: [PATCH 4/4] PR feedback.

---
 src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs b/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs
index f74e5d46cd..d2260c2571 100644
--- a/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs
+++ b/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs
@@ -2179,7 +2179,7 @@ private protected override void CheckLabel(RoleMappedData examples, out int weig
     /// To create this trainer, use [SgdCalibrated](xref:Microsoft.ML.StandardTrainersCatalog.SgdCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,System.String,System.String,System.String,System.Int32,System.Double,System.Single))
     /// or [SgdCalibrated(Options)](xref:Microsoft.ML.StandardTrainersCatalog.SgdCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.SgdCalibratedTrainer.Options)).
     ///
-    /// [!include[io](~/../docs/samples/docs/api-reference/io-columns-regression.md)]
+    /// [!include[io](~/../docs/samples/docs/api-reference/io-columns-binary-classification.md)]
     ///
     /// ### Trainer Characteristics
     /// |  |  |
@@ -2265,7 +2265,7 @@ private protected override CalibratedModelParametersBase<LinearBinaryModelParame
     /// To create this trainer, use [SgdNonCalibrated](xref:Microsoft.ML.StandardTrainersCatalog.SgdNonCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,System.String,System.String,System.String,Microsoft.ML.Trainers.IClassificationLoss,System.Int32,System.Double,System.Single))
     /// or [SgdNonCalibrated(Options)](xref:Microsoft.ML.StandardTrainersCatalog.SgdNonCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.SgdNonCalibratedTrainer.Options)).
     ///
-    /// [!include[io](~/../docs/samples/docs/api-reference/io-columns-regression.md)]
+    /// [!include[io](~/../docs/samples/docs/api-reference/io-columns-binary-classification.md)]
     ///
     /// ### Trainer Characteristics
     /// |  |  |