dotnet · codemzs · Apr 21, 2019 · Apr 18, 2019 · Apr 21, 2019 · Apr 21, 2019
diff --git a/docs/api-reference/algo-details-sgd.md b/docs/api-reference/algo-details-sgd.md
@@ -0,0 +1,9 @@
+### Training Algorithm Details
+The Stochastic Gradient Descent (SGD) is one of the popular stochastic
+optimization procedures that can be integrated into several machine learning
+tasks to achieve state-of-the-art performance. This trainer implements the
+Hogwild Stochastic Gradient Descent for binary classification that supports
+multi-threading without any locking. If the associated optimization problem is
+sparse, Hogwild Stochastic Gradient Descent achieves a nearly optimal rate of
+convergence. For more details about Hogwild Stochastic Gradient Descent can be
+found [here](http://arxiv.org/pdf/1106.5730v2.pdf).
diff --git a/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs b/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs
@@ -137,7 +137,7 @@ private protected float WScaledDot(in VBuffer<float> features, Double scaling, i
         }
 
         private protected virtual int ComputeNumThreads(FloatLabelCursor.Factory cursorFactory)
-            =>  Math.Min(8, Math.Max(1, Environment.ProcessorCount / 2));
+            => Math.Min(8, Math.Max(1, Environment.ProcessorCount / 2));
     }
 
     public abstract class SdcaTrainerBase<TOptions, TTransformer, TModel> : StochasticTrainerBase<TTransformer, TModel>
@@ -2175,16 +2175,34 @@ private protected override void CheckLabel(RoleMappedData examples, out int weig
     /// linear function to a <see cref="PlattCalibrator"/>.
     /// </summary>
     /// <remarks>
-    /// The Stochastic Gradient Descent (SGD) is one of the popular stochastic optimization procedures that can be integrated
-    /// into several machine learning tasks to achieve state-of-the-art performance. This trainer implements the Hogwild SGD for binary classification
-    /// that supports multi-threading without any locking. If the associated optimization problem is sparse, Hogwild SGD achieves a nearly optimal
-    /// rate of convergence. For more details about Hogwild SGD, please refer to http://arxiv.org/pdf/1106.5730v2.pdf.
+    /// <format type="text/markdown"><![CDATA[
+    /// To create this trainer, use [SgdCalibrated](xref:Microsoft.ML.StandardTrainersCatalog.SgdCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,System.String,System.String,System.String,System.Int32,System.Double,System.Single))
+    /// or [SgdCalibrated(Options)](xref:Microsoft.ML.StandardTrainersCatalog.SgdCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.SgdCalibratedTrainer.Options)).
+    ///
+    /// [!include[io](~/../docs/samples/docs/api-reference/io-columns-regression.md)]
+    ///
+    /// ### Trainer Characteristics
+    /// |  |  |
+    /// | -- | -- |
+    /// | Machine learning task | Binary classification |
+    /// | Is normalization required? | Yes |
+    /// | Is caching required? | No |
+    /// | Required NuGet in addition to Microsoft.ML | None |
+    ///
+    /// [!include[io](~/../docs/samples/docs/api-reference/algo-details-sgd.md)]
+    /// ]]>
+    /// </format>
     /// </remarks>
+    /// <seealso cref="StandardTrainersCatalog.SgdCalibrated(BinaryClassificationCatalog.BinaryClassificationTrainers, string, string, string, int, double, float)"/>
+    /// <seealso cref="StandardTrainersCatalog.SgdCalibrated(BinaryClassificationCatalog.BinaryClassificationTrainers, SgdCalibratedTrainer.Options)"/>
+    /// <seealso cref="Options"/>
     public sealed class SgdCalibratedTrainer :
         SgdBinaryTrainerBase<CalibratedModelParametersBase<LinearBinaryModelParameters, PlattCalibrator>>
     {
+
         /// <summary>
-        /// Options for the <see cref="SgdCalibratedTrainer"/>.
+        /// Options for the <see cref="SgdCalibratedTrainer"/> as used in
+        /// [SgdCalibrated(Options)](xref:Microsoft.ML.StandardTrainersCatalog.SgdCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.SgdCalibratedTrainer.Options)).
         /// </summary>
         public sealed class Options : OptionsBase
         {
@@ -2240,12 +2258,37 @@ private protected override CalibratedModelParametersBase<LinearBinaryModelParame
     }
 
     /// <summary>
-    /// <see cref="SgdNonCalibratedTrainer"/> can train a linear classification model by minimizing any loss function
-    /// which implements <see cref="IClassificationLoss"/>.
+    /// The <see cref="IEstimator{TTransformer}"/> for training logistic regression using a parallel stochastic gradient method.
     /// </summary>
+    /// <remarks>
+    /// <format type="text/markdown"><![CDATA[
+    /// To create this trainer, use [SgdNonCalibrated](xref:Microsoft.ML.StandardTrainersCatalog.SgdNonCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,System.String,System.String,System.String,Microsoft.ML.Trainers.IClassificationLoss,System.Int32,System.Double,System.Single))
+    /// or [SgdNonCalibrated(Options)](xref:Microsoft.ML.StandardTrainersCatalog.SgdNonCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.SgdNonCalibratedTrainer.Options)).
+    ///
+    /// [!include[io](~/../docs/samples/docs/api-reference/io-columns-regression.md)]
+    ///
+    /// ### Trainer Characteristics
+    /// |  |  |
+    /// | -- | -- |
+    /// | Machine learning task | Binary classification |
+    /// | Is normalization required? | Yes |
+    /// | Is caching required? | No |
+    /// | Required NuGet in addition to Microsoft.ML | None |
+    ///
+    /// [!include[io](~/../docs/samples/docs/api-reference/algo-details-sgd.md)]
+    /// ]]>
+    /// </format>
+    /// </remarks>
+    /// <seealso cref="StandardTrainersCatalog.SgdNonCalibrated(BinaryClassificationCatalog.BinaryClassificationTrainers, string, string, string, IClassificationLoss, int, double, float)"/>
+    /// <seealso cref="StandardTrainersCatalog.SgdNonCalibrated(BinaryClassificationCatalog.BinaryClassificationTrainers, SgdNonCalibratedTrainer.Options)"/>
+    /// <seealso cref="Options"/>
     public sealed class SgdNonCalibratedTrainer :
         SgdBinaryTrainerBase<LinearBinaryModelParameters>
     {
+        /// <summary>
+        /// Options for the <see cref="SgdNonCalibratedTrainer"/> as used in
+        /// [SgdNonCalibrated(Options)](xref:Microsoft.ML.StandardTrainersCatalog.SgdNonCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.SgdNonCalibratedTrainer.Options)).
+        /// </summary>
         public sealed class Options : OptionsBase
         {
             /// <summary>

diff --git a/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs b/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs
@@ -17,12 +17,12 @@ namespace Microsoft.ML
     public static class StandardTrainersCatalog
     {
         /// <summary>
-        /// Predict a target using a linear classification model trained with <see cref="SgdCalibratedTrainer"/>.
+        /// Create <see cref="SgdCalibratedTrainer"/>, which predicts a target using a linear classification model.
         /// Stochastic gradient descent (SGD) is an iterative algorithm that optimizes a differentiable objective function.
         /// </summary>
         /// <param name="catalog">The binary classification catalog trainer object.</param>
-        /// <param name="labelColumnName">The name of the label column, or dependent variable.</param>
-        /// <param name="featureColumnName">The features, or independent variables.</param>
+        /// <param name="labelColumnName">The name of the label column, or dependent variable. The column data must be <see cref="System.Boolean"/>.</param>
+        /// <param name="featureColumnName">The features, or independent variables. The column data must be a known-sized vector of <see cref="System.Single"/>.</param>
         /// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
         /// <param name="numberOfIterations">The maximum number of passes through the training dataset; set to 1 to simulate online learning.</param>
         /// <param name="learningRate">The initial learning rate used by SGD.</param>
@@ -49,7 +49,7 @@ public static SgdCalibratedTrainer SgdCalibrated(this BinaryClassificationCatalo
         }
 
         /// <summary>
-        /// Predict a target using a linear classification model trained with <see cref="SgdCalibratedTrainer"/> and advanced options.
+        /// Create <see cref="Trainers.SgdCalibratedTrainer"/> with advanced options, which predicts a target using a linear classification model.
         /// Stochastic gradient descent (SGD) is an iterative algorithm that optimizes a differentiable objective function.
         /// </summary>
         /// <param name="catalog">The binary classification catalog trainer object.</param>
@@ -72,12 +72,12 @@ public static SgdCalibratedTrainer SgdCalibrated(this BinaryClassificationCatalo
         }
 
         /// <summary>
-        /// Predict a target using a linear classification model trained with <see cref="SgdNonCalibratedTrainer"/>.
+        /// Create <see cref="Trainers.SgdNonCalibratedTrainer"/>, which predicts a target using a linear classification model.
         /// Stochastic gradient descent (SGD) is an iterative algorithm that optimizes a differentiable objective function.
         /// </summary>
         /// <param name="catalog">The binary classification catalog trainer object.</param>
-        /// <param name="labelColumnName">The name of the label column, or dependent variable.</param>
-        /// <param name="featureColumnName">The features, or independent variables.</param>
+        /// <param name="labelColumnName">The name of the label column, or dependent variable. The column data must be <see cref="System.Boolean"/>.</param>
+        /// <param name="featureColumnName">The features, or independent variables. The column data must be a known-sized vector of <see cref="System.Single"/>.</param>
         /// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
         /// <param name="lossFunction">The <a href="https://en.wikipedia.org/wiki/Loss_function">loss</a> function minimized in the training process. Using, for example, <see cref="HingeLoss"/> leads to a support vector machine trainer.</param>
         /// <param name="numberOfIterations">The maximum number of passes through the training dataset; set to 1 to simulate online learning.</param>
@@ -106,7 +106,7 @@ public static SgdNonCalibratedTrainer SgdNonCalibrated(this BinaryClassification
         }
 
         /// <summary>
-        /// Predict a target using a linear classification model trained with <see cref="SgdNonCalibratedTrainer"/> and advanced options.
+        /// Create <see cref="Trainers.SgdNonCalibratedTrainer"/> with advanced options, which predicts a target using a linear classification model.
         /// Stochastic gradient descent (SGD) is an iterative algorithm that optimizes a differentiable objective function.
         /// </summary>
         /// <param name="catalog">The binary classification catalog trainer object.</param>