From f9e39c9ca6dc52d090f93db2bac36b88a002cd55 Mon Sep 17 00:00:00 2001
From: Zeeshan Siddiqui <mzs@microsoft.com>
Date: Thu, 18 Apr 2019 12:02:54 -0700
Subject: [PATCH 1/3] XML documentation for SDCA regression trainer.

---
 .../Standard/SdcaRegression.cs                | 33 ++++++++++++++++++-
 .../StandardTrainersCatalog.cs                | 16 ++++-----
 2 files changed, 40 insertions(+), 9 deletions(-)
diff --git a/src/Microsoft.ML.StandardTrainers/Standard/SdcaRegression.cs b/src/Microsoft.ML.StandardTrainers/Standard/SdcaRegression.cs
index 1f27c19bcc..2ada47d25e 100644
--- a/src/Microsoft.ML.StandardTrainers/Standard/SdcaRegression.cs
+++ b/src/Microsoft.ML.StandardTrainers/Standard/SdcaRegression.cs
@@ -24,7 +24,38 @@ namespace Microsoft.ML.Trainers
     /// <summary>
     /// The <see cref="IEstimator{TTransformer}"/> for training a regression model using the stochastic dual coordinate ascent method.
     /// </summary>
-    /// <include file='doc.xml' path='doc/members/member[@name="SDCA_remarks"]/*' />
+    /// <remarks>
+    /// <format type="text/markdown"><![CDATA[
+    /// To create this trainer, use [Sdca](xref:Microsoft.ML.StandardTrainersCatalog.Sdca(Microsoft.ML.RegressionCatalog.RegressionTrainers,System.String,System.String,System.String,Microsoft.ML.Trainers.ISupportSdcaRegressionLoss,System.Nullable{System.Single},System.Nullable{System.Single},System.Nullable{System.Int32}))
+    /// or [Sdca(Options)](xref:Microsoft.ML.StandardTrainersCatalog.Sdca(Microsoft.ML.RegressionCatalog.RegressionTrainers,Microsoft.ML.Trainers.SdcaRegressionTrainer.Options)).
+    ///
+    /// [!include[io](~/../docs/samples/docs/api-reference/io-columns-regression.md)]
+    ///
+    /// ### Trainer Characteristics
+    /// |  |  |
+    /// | -- | -- |
+    /// | Machine learning task | Regression |
+    /// | Is normalization required? | Yes |
+    /// | Is caching required? | No |
+    /// | Required NuGet in addition to Microsoft.ML | None |
+    ///
+    /// ### Training Algorithm Details
+    /// This trainer is based on the Stochastic Dual Coordinate Ascent (SDCA) method, a state-of-the-art optimization technique for convex objective functions.
+    /// The algorithm can be scaled for use on large out-of-memory data sets due to a semi-asynchronized implementation that supports multi-threading.
+    /// Convergence is underwritten by periodically enforcing synchronization between primal and dual updates in a separate thread.
+    /// Several choices of loss functions are also provided.The SDCA method combines several of the best properties and capabilities of logistic regression and SVM algorithms.
+    /// Note that SDCA is a stochastic and streaming optimization algorithm. The results depends on the order of the training data.
+    /// For reproducible results, it is recommended that one sets 'Shuffle' to False and 'NumThreads' to 1.
+    /// Elastic net regularization can be specified by the 'L2Const' and 'L1Threshold' parameters. Note that the 'L2Const' has an effect on the rate of convergence.
+    /// In general, the larger the 'L2Const', the faster SDCA converges.
+    /// For more information, see: [Scaling Up Stochastic Dual Coordinate Ascent](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/06/main-3.pdf ) and
+    /// [Stochastic Dual Coordinate Ascent Methods for Regularized Loss Minimization](http://www.jmlr.org/papers/volume14/shalev-shwartz13a/shalev-shwartz13a.pdf).
+    /// ]]>
+    /// </format>
+    /// </remarks>
+    /// <seealso cref="Microsoft.ML.StandardTrainersCatalog.Sdca(Microsoft.ML.RegressionCatalog.RegressionTrainers,System.String,System.String,System.String,Microsoft.ML.Trainers.ISupportSdcaRegressionLoss,System.Nullable{System.Single},System.Nullable{System.Single},System.Nullable{System.Int32})"/>
+    /// <seealso cref="Microsoft.ML.StandardTrainersCatalog.Sdca(Microsoft.ML.RegressionCatalog.RegressionTrainers,Microsoft.ML.Trainers.SdcaRegressionTrainer.Options)"/>
+    /// <seealso cref="Options"/>
     public sealed class SdcaRegressionTrainer : SdcaTrainerBase<SdcaRegressionTrainer.Options, RegressionPredictionTransformer<LinearRegressionModelParameters>, LinearRegressionModelParameters>
     {
         internal const string LoadNameValue = "SDCAR";
diff --git a/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs b/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs
index 9f6aca77dc..67018c48a1 100644
--- a/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs
+++ b/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs
@@ -129,11 +129,11 @@ public static SgdNonCalibratedTrainer SgdNonCalibrated(this BinaryClassification
         }
 
         /// <summary>
-        /// Predict a target using a linear regression model trained with <see cref="SdcaRegressionTrainer"/>.
+        /// Creates a <see cref="SdcaRegressionTrainer"/>, which predicts a target using a linear regression model.
         /// </summary>
         /// <param name="catalog">The regression catalog trainer object.</param>
-        /// <param name="labelColumnName">The name of the label column.</param>
-        /// <param name="featureColumnName">The name of the feature column.</param>
+        /// <param name="labelColumnName">The name of the label column. The column data must be <see cref="System.Single"/></param>
+        /// <param name="featureColumnName">The name of the feature column. The column data must be a known-sized vector of <see cref="System.Single"/></param>
         /// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
         /// <param name="lossFunction">The <a href="https://en.wikipedia.org/wiki/Loss_function">loss</a> function minimized in the training process. Using, for example, its default <see cref="SquaredLoss"/> leads to a least square trainer.</param>
         /// <param name="l2Regularization">The L2 weight for <a href='https://en.wikipedia.org/wiki/Regularization_(mathematics)'>regularization</a>.</param>
@@ -160,7 +160,7 @@ public static SdcaRegressionTrainer Sdca(this RegressionCatalog.RegressionTraine
         }
 
         /// <summary>
-        /// Predict a target using a linear regression model trained with <see cref="SdcaRegressionTrainer"/> and advanced options.
+        /// Creates a <see cref="SdcaRegressionTrainer"/>, which predicts a target using a linear regression model trained with advanced options.
         /// </summary>
         /// <param name="catalog">The regression catalog trainer object.</param>
         /// <param name="options">Trainer options.</param>
@@ -181,11 +181,11 @@ public static SdcaRegressionTrainer Sdca(this RegressionCatalog.RegressionTraine
         }
 
         /// <summary>
-        /// Predict a target using a linear classification model trained with <see cref="SdcaLogisticRegressionBinaryTrainer"/>.
+        /// Creates a <see cref="SdcaLogisticRegressionBinaryTrainer"/>, that predicts a target using a linear classification model.
         /// </summary>
         /// <param name="catalog">The binary classification catalog trainer object.</param>
-        /// <param name="labelColumnName">The name of the label column.</param>
-        /// <param name="featureColumnName">The name of the feature column.</param>
+        /// <param name="labelColumnName">The name of the label column. The column data must be <see cref="System.Single"/>.</param>
+        /// <param name="featureColumnName">The name of the feature column. The column data must be a known-sized vector of <see cref="System.Single"/>.</param>
         /// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
         /// <param name="l2Regularization">The L2 weight for <a href='https://en.wikipedia.org/wiki/Regularization_(mathematics)'>regularization</a>.</param>
         /// <param name="l1Regularization">The L1 <a href='https://en.wikipedia.org/wiki/Regularization_(mathematics)'>regularization</a> hyperparameter. Higher values will tend to lead to more sparse model.</param>
@@ -211,7 +211,7 @@ public static SdcaLogisticRegressionBinaryTrainer SdcaLogisticRegression(
         }
 
         /// <summary>
-        /// Predict a target using a linear classification model trained with <see cref="SdcaLogisticRegressionBinaryTrainer"/> and advanced options.
+        /// Creates a <see cref="SdcaLogisticRegressionBinaryTrainer"/>, which predicts a target using a linear classification model with advanced options.
         /// </summary>
         /// <param name="catalog">The binary classification catalog trainer object.</param>
         /// <param name="options">Trainer options.</param>

From fe88eedc70fd9ded0b77343e603ea4443c9ad622 Mon Sep 17 00:00:00 2001
From: Zeeshan Siddiqui <mzs@microsoft.com>
Date: Sun, 21 Apr 2019 10:52:12 -0700
Subject: [PATCH 2/3] PR feedback.

---
 docs/api-reference/algo-details-sdca.md       | 25 +++++++++++++++++++
 .../Standard/SdcaRegression.cs                | 16 +++---------
 .../StandardTrainersCatalog.cs                |  8 +++---
 3 files changed, 32 insertions(+), 17 deletions(-)
 create mode 100644 docs/api-reference/algo-details-sdca.md

diff --git a/docs/api-reference/algo-details-sdca.md b/docs/api-reference/algo-details-sdca.md
new file mode 100644
index 0000000000..df935b07ba
--- /dev/null
+++ b/docs/api-reference/algo-details-sdca.md
@@ -0,0 +1,25 @@
+### Training Algorithm Details
+This trainer is based on the Stochastic Dual Coordinate Ascent (SDCA) method, a
+state-of-the-art optimization technique for convex objective functions. The
+algorithm can be scaled for use on large out-of-memory data sets due to a
+semi-asynchronized implementation that supports multi-threading.
+        
+Convergence is underwritten by periodically enforcing synchronization between
+primal and dual variables in a separate thread. Several choices of loss
+functions are also provided.
+          
+Note that SDCA is a stochastic and streaming optimization algorithm. The result
+depends on the order of training data because the stopping tolerance is not
+tight enough. In strongly-convex optimization, the optimal solution is unique
+and therefore everyone eventually reaches the same place. Even in
+non-strongly-convex cases, you will get equally-good solutions from run to run.
+For reproducible results, it is recommended that one sets 'Shuffle' to False and
+'NumThreads' to 1. Elastic net regularization can be specified by the 'L2Const'
+and 'L1Threshold' parameters. Note that the 'L2Const' has an effect on the rate
+of convergence. In general, the larger the 'L2Const', the faster SDCA converges.
+
+For more information, see:
+* [Scaling Up Stochastic Dual Coordinate
+  Ascent.](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/06/main-3.pdf)
+* [Stochastic Dual Coordinate Ascent Methods for Regularized Loss
+  Minimization.](http://www.jmlr.org/papers/volume14/shalev-shwartz13a/shalev-shwartz13a.pdf)
\ No newline at end of file
diff --git a/src/Microsoft.ML.StandardTrainers/Standard/SdcaRegression.cs b/src/Microsoft.ML.StandardTrainers/Standard/SdcaRegression.cs
index 2ada47d25e..5398cf4e0b 100644
--- a/src/Microsoft.ML.StandardTrainers/Standard/SdcaRegression.cs
+++ b/src/Microsoft.ML.StandardTrainers/Standard/SdcaRegression.cs
@@ -39,22 +39,12 @@ namespace Microsoft.ML.Trainers
     /// | Is caching required? | No |
     /// | Required NuGet in addition to Microsoft.ML | None |
     ///
-    /// ### Training Algorithm Details
-    /// This trainer is based on the Stochastic Dual Coordinate Ascent (SDCA) method, a state-of-the-art optimization technique for convex objective functions.
-    /// The algorithm can be scaled for use on large out-of-memory data sets due to a semi-asynchronized implementation that supports multi-threading.
-    /// Convergence is underwritten by periodically enforcing synchronization between primal and dual updates in a separate thread.
-    /// Several choices of loss functions are also provided.The SDCA method combines several of the best properties and capabilities of logistic regression and SVM algorithms.
-    /// Note that SDCA is a stochastic and streaming optimization algorithm. The results depends on the order of the training data.
-    /// For reproducible results, it is recommended that one sets 'Shuffle' to False and 'NumThreads' to 1.
-    /// Elastic net regularization can be specified by the 'L2Const' and 'L1Threshold' parameters. Note that the 'L2Const' has an effect on the rate of convergence.
-    /// In general, the larger the 'L2Const', the faster SDCA converges.
-    /// For more information, see: [Scaling Up Stochastic Dual Coordinate Ascent](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/06/main-3.pdf ) and
-    /// [Stochastic Dual Coordinate Ascent Methods for Regularized Loss Minimization](http://www.jmlr.org/papers/volume14/shalev-shwartz13a/shalev-shwartz13a.pdf).
+    /// [!include[io](~/../docs/samples/docs/api-reference/algo-details-sdca.md)]
     /// ]]>
     /// </format>
     /// </remarks>
-    /// <seealso cref="Microsoft.ML.StandardTrainersCatalog.Sdca(Microsoft.ML.RegressionCatalog.RegressionTrainers,System.String,System.String,System.String,Microsoft.ML.Trainers.ISupportSdcaRegressionLoss,System.Nullable{System.Single},System.Nullable{System.Single},System.Nullable{System.Int32})"/>
-    /// <seealso cref="Microsoft.ML.StandardTrainersCatalog.Sdca(Microsoft.ML.RegressionCatalog.RegressionTrainers,Microsoft.ML.Trainers.SdcaRegressionTrainer.Options)"/>
+    /// <seealso cref="StandardTrainersCatalog.Sdca(RegressionCatalog.RegressionTrainers, string, string, string, ISupportSdcaRegressionLoss, float?, float?, int?)"/>
+    /// <seealso cref="StandardTrainersCatalog.Sdca(RegressionCatalog.RegressionTrainers, SdcaRegressionTrainer.Options)"/>
     /// <seealso cref="Options"/>
     public sealed class SdcaRegressionTrainer : SdcaTrainerBase<SdcaRegressionTrainer.Options, RegressionPredictionTransformer<LinearRegressionModelParameters>, LinearRegressionModelParameters>
     {
diff --git a/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs b/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs
index 67018c48a1..36cfaf0760 100644
--- a/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs
+++ b/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs
@@ -129,7 +129,7 @@ public static SgdNonCalibratedTrainer SgdNonCalibrated(this BinaryClassification
         }
 
         /// <summary>
-        /// Creates a <see cref="SdcaRegressionTrainer"/>, which predicts a target using a linear regression model.
+        /// Create <see cref="SdcaRegressionTrainer"/>, which predicts a target using a linear regression model.
         /// </summary>
         /// <param name="catalog">The regression catalog trainer object.</param>
         /// <param name="labelColumnName">The name of the label column. The column data must be <see cref="System.Single"/></param>
@@ -160,7 +160,7 @@ public static SdcaRegressionTrainer Sdca(this RegressionCatalog.RegressionTraine
         }
 
         /// <summary>
-        /// Creates a <see cref="SdcaRegressionTrainer"/>, which predicts a target using a linear regression model trained with advanced options.
+        /// Creates <see cref="SdcaRegressionTrainer"/> with advanced options, which predicts a target using a linear regression model trained.
         /// </summary>
         /// <param name="catalog">The regression catalog trainer object.</param>
         /// <param name="options">Trainer options.</param>
@@ -181,7 +181,7 @@ public static SdcaRegressionTrainer Sdca(this RegressionCatalog.RegressionTraine
         }
 
         /// <summary>
-        /// Creates a <see cref="SdcaLogisticRegressionBinaryTrainer"/>, that predicts a target using a linear classification model.
+        /// Create <see cref="SdcaLogisticRegressionBinaryTrainer"/>, that predicts a target using a linear classification model.
         /// </summary>
         /// <param name="catalog">The binary classification catalog trainer object.</param>
         /// <param name="labelColumnName">The name of the label column. The column data must be <see cref="System.Single"/>.</param>
@@ -211,7 +211,7 @@ public static SdcaLogisticRegressionBinaryTrainer SdcaLogisticRegression(
         }
 
         /// <summary>
-        /// Creates a <see cref="SdcaLogisticRegressionBinaryTrainer"/>, which predicts a target using a linear classification model with advanced options.
+        /// Create <see cref="SdcaLogisticRegressionBinaryTrainer"/> using advanced options, which predicts a target using a linear classification model.
         /// </summary>
         /// <param name="catalog">The binary classification catalog trainer object.</param>
         /// <param name="options">Trainer options.</param>

From 01d28a4f8d4726d7feb28a884cdb77bf428c3917 Mon Sep 17 00:00:00 2001
From: Zeeshan Siddiqui <mzs@microsoft.com>
Date: Sun, 21 Apr 2019 11:02:28 -0700
Subject: [PATCH 3/3] PR feedback.

---
 src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs b/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs
index 36cfaf0760..486a8dfd9f 100644
--- a/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs
+++ b/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs
@@ -160,7 +160,7 @@ public static SdcaRegressionTrainer Sdca(this RegressionCatalog.RegressionTraine
         }
 
         /// <summary>
-        /// Creates <see cref="SdcaRegressionTrainer"/> with advanced options, which predicts a target using a linear regression model trained.
+        /// Create <see cref="SdcaRegressionTrainer"/> with advanced options, which predicts a target using a linear regression model.
         /// </summary>
         /// <param name="catalog">The regression catalog trainer object.</param>
         /// <param name="options">Trainer options.</param>
@@ -181,7 +181,7 @@ public static SdcaRegressionTrainer Sdca(this RegressionCatalog.RegressionTraine
         }
 
         /// <summary>
-        /// Create <see cref="SdcaLogisticRegressionBinaryTrainer"/>, that predicts a target using a linear classification model.
+        /// Create <see cref="SdcaLogisticRegressionBinaryTrainer"/>, which predicts a target using a linear classification model.
         /// </summary>
         /// <param name="catalog">The binary classification catalog trainer object.</param>
         /// <param name="labelColumnName">The name of the label column. The column data must be <see cref="System.Single"/>.</param>