From 95fcf1a1e5f79993de944ac15ad5ccbe60152f30 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Wed, 17 Apr 2019 22:31:47 -0700 Subject: [PATCH 1/4] Xml documentation for Calibrated and Non Calibrated SGD Trainer. --- .../Standard/SdcaBinary.cs | 63 +++++++++++++++++-- .../StandardTrainersCatalog.cs | 16 ++--- 2 files changed, 65 insertions(+), 14 deletions(-) diff --git a/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs b/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs index bbe982b6cb..4a9860a4c2 100644 --- a/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs +++ b/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs @@ -137,7 +137,7 @@ private protected float WScaledDot(in VBuffer features, Double scaling, i } private protected virtual int ComputeNumThreads(FloatLabelCursor.Factory cursorFactory) - => Math.Min(8, Math.Max(1, Environment.ProcessorCount / 2)); + => Math.Min(8, Math.Max(1, Environment.ProcessorCount / 2)); } public abstract class SdcaTrainerBase : StochasticTrainerBase @@ -2170,21 +2170,43 @@ private protected override void CheckLabel(RoleMappedData examples, out int weig } /// - /// The for training logistic regression using a parallel stochastic gradient method. + /// The for training logistic regression using a parallel stochastic gradient method. /// The trained model is calibrated and can produce probability by feeding the output value of the /// linear function to a . /// /// + /// + /// /// + /// + /// + /// public sealed class SgdCalibratedTrainer : SgdBinaryTrainerBase> { + /// - /// Options for the . + /// Options for the as used in + /// [SgdCalibrated(Options)](xref:Microsoft.ML.StandardTrainersCatalog.SgdCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.SgdCalibratedTrainer.Options)). /// public sealed class Options : OptionsBase { @@ -2240,12 +2262,41 @@ private protected override CalibratedModelParametersBase - /// can train a linear classification model by minimizing any loss function - /// which implements . + /// The for training logistic regression using a parallel stochastic gradient method. /// + /// + /// + /// + /// + /// + /// + /// public sealed class SgdNonCalibratedTrainer : SgdBinaryTrainerBase { + /// + /// Options for the as used in + /// [SgdNonCalibrated(Options)](xref:Microsoft.ML.StandardTrainersCatalog.SgdNonCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.SgdNonCalibratedTrainer.Options)). + /// public sealed class Options : OptionsBase { /// diff --git a/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs b/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs index 486a8dfd9f..c22dfa1e3b 100644 --- a/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs +++ b/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs @@ -17,12 +17,12 @@ namespace Microsoft.ML public static class StandardTrainersCatalog { /// - /// Predict a target using a linear classification model trained with . + /// Creates a that predicts a target using a linear classification model. /// Stochastic gradient descent (SGD) is an iterative algorithm that optimizes a differentiable objective function. /// /// The binary classification catalog trainer object. - /// The name of the label column, or dependent variable. - /// The features, or independent variables. + /// The name of the label column, or dependent variable. The column data must be . + /// The features, or independent variables. The column data must be a known-sized vector of /// The name of the example weight column (optional). /// The maximum number of passes through the training dataset; set to 1 to simulate online learning. /// The initial learning rate used by SGD. @@ -49,7 +49,7 @@ public static SgdCalibratedTrainer SgdCalibrated(this BinaryClassificationCatalo } /// - /// Predict a target using a linear classification model trained with and advanced options. + /// Creates a that predicts a target using a linear classification model and advanced options. /// Stochastic gradient descent (SGD) is an iterative algorithm that optimizes a differentiable objective function. /// /// The binary classification catalog trainer object. @@ -72,12 +72,12 @@ public static SgdCalibratedTrainer SgdCalibrated(this BinaryClassificationCatalo } /// - /// Predict a target using a linear classification model trained with . + /// Creates a that predicts a target using a linear classification model. /// Stochastic gradient descent (SGD) is an iterative algorithm that optimizes a differentiable objective function. /// /// The binary classification catalog trainer object. - /// The name of the label column, or dependent variable. - /// The features, or independent variables. + /// The name of the label column, or dependent variable. The column data must be . + /// The features, or independent variables. The column data must be a known-sized vector of /// The name of the example weight column (optional). /// The loss function minimized in the training process. Using, for example, leads to a support vector machine trainer. /// The maximum number of passes through the training dataset; set to 1 to simulate online learning. @@ -106,7 +106,7 @@ public static SgdNonCalibratedTrainer SgdNonCalibrated(this BinaryClassification } /// - /// Predict a target using a linear classification model trained with and advanced options. + /// Creates a that predicts a target using a linear classification model and advanced options. /// Stochastic gradient descent (SGD) is an iterative algorithm that optimizes a differentiable objective function. /// /// The binary classification catalog trainer object. From 8d0e86e54ba8e59c25646dd43c0e0a095e7f9ced Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Sun, 21 Apr 2019 12:17:00 -0700 Subject: [PATCH 2/4] PR feedback. --- .../Standard/SdcaBinary.cs | 28 +++++++++---------- .../StandardTrainersCatalog.cs | 12 ++++---- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs b/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs index 4a9860a4c2..e509f11273 100644 --- a/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs +++ b/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs @@ -2170,7 +2170,7 @@ private protected override void CheckLabel(RoleMappedData examples, out int weig } /// - /// The for training logistic regression using a parallel stochastic gradient method. + /// The for training logistic regression using a parallel stochastic gradient method. /// The trained model is calibrated and can produce probability by feeding the output value of the /// linear function to a . /// @@ -2179,7 +2179,7 @@ private protected override void CheckLabel(RoleMappedData examples, out int weig /// To create this trainer, use [SgdCalibrated](xref:Microsoft.ML.StandardTrainersCatalog.SgdCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,System.String,System.String,System.String,System.Int32,System.Double,System.Single)) /// or [SgdCalibrated(Options)](xref:Microsoft.ML.StandardTrainersCatalog.SgdCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.SgdCalibratedTrainer.Options)). /// - /// [!include[io](~/../docs/samples/docs/api-reference/io-columns-binary-classification.md)] + /// [!include[io](~/../docs/samples/docs/api-reference/io-columns-regression.md)] /// /// ### Trainer Characteristics /// | | | @@ -2191,14 +2191,14 @@ private protected override void CheckLabel(RoleMappedData examples, out int weig /// /// ### Training Algorithm Details /// The Stochastic Gradient Descent (SGD) is one of the popular stochastic optimization procedures that can be integrated - /// into several machine learning tasks to achieve state-of-the-art performance. This trainer implements the Hogwild SGD for binary classification - /// that supports multi-threading without any locking. If the associated optimization problem is sparse, Hogwild SGD achieves a nearly optimal - /// rate of convergence. For more details about Hogwild SGD can be found [here](http://arxiv.org/pdf/1106.5730v2.pdf). + /// into several machine learning tasks to achieve state-of-the-art performance. This trainer implements the Hogwild Stochastic Gradient Descent for binary classification + /// that supports multi-threading without any locking. If the associated optimization problem is sparse, Hogwild Stochastic Gradient Descent achieves a nearly optimal + /// rate of convergence. For more details about Hogwild Stochastic Gradient Descent can be found [here](http://arxiv.org/pdf/1106.5730v2.pdf). /// ]]> /// /// - /// - /// + /// + /// /// public sealed class SgdCalibratedTrainer : SgdBinaryTrainerBase> @@ -2269,7 +2269,7 @@ private protected override CalibratedModelParametersBase /// /// - /// - /// + /// + /// /// public sealed class SgdNonCalibratedTrainer : SgdBinaryTrainerBase diff --git a/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs b/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs index c22dfa1e3b..55c189b32a 100644 --- a/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs +++ b/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs @@ -17,12 +17,12 @@ namespace Microsoft.ML public static class StandardTrainersCatalog { /// - /// Creates a that predicts a target using a linear classification model. + /// Create , which predicts a target using a linear classification model. /// Stochastic gradient descent (SGD) is an iterative algorithm that optimizes a differentiable objective function. /// /// The binary classification catalog trainer object. /// The name of the label column, or dependent variable. The column data must be . - /// The features, or independent variables. The column data must be a known-sized vector of + /// The features, or independent variables. The column data must be a known-sized vector of . /// The name of the example weight column (optional). /// The maximum number of passes through the training dataset; set to 1 to simulate online learning. /// The initial learning rate used by SGD. @@ -49,7 +49,7 @@ public static SgdCalibratedTrainer SgdCalibrated(this BinaryClassificationCatalo } /// - /// Creates a that predicts a target using a linear classification model and advanced options. + /// Create with advanced options, which predicts a target using a linear classification model. /// Stochastic gradient descent (SGD) is an iterative algorithm that optimizes a differentiable objective function. /// /// The binary classification catalog trainer object. @@ -72,12 +72,12 @@ public static SgdCalibratedTrainer SgdCalibrated(this BinaryClassificationCatalo } /// - /// Creates a that predicts a target using a linear classification model. + /// Create , which predicts a target using a linear classification model. /// Stochastic gradient descent (SGD) is an iterative algorithm that optimizes a differentiable objective function. /// /// The binary classification catalog trainer object. /// The name of the label column, or dependent variable. The column data must be . - /// The features, or independent variables. The column data must be a known-sized vector of + /// The features, or independent variables. The column data must be a known-sized vector of . /// The name of the example weight column (optional). /// The loss function minimized in the training process. Using, for example, leads to a support vector machine trainer. /// The maximum number of passes through the training dataset; set to 1 to simulate online learning. @@ -106,7 +106,7 @@ public static SgdNonCalibratedTrainer SgdNonCalibrated(this BinaryClassification } /// - /// Creates a that predicts a target using a linear classification model and advanced options. + /// Create with advanced options, which predicts a target using a linear classification model. /// Stochastic gradient descent (SGD) is an iterative algorithm that optimizes a differentiable objective function. /// /// The binary classification catalog trainer object. From e58b622e815860c98b042bdedca91ebcaac5874a Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Sun, 21 Apr 2019 12:22:24 -0700 Subject: [PATCH 3/4] PR feedback. --- docs/api-reference/algo-details-sgd.md | 9 +++++++++ .../Standard/SdcaBinary.cs | 12 ++---------- 2 files changed, 11 insertions(+), 10 deletions(-) create mode 100644 docs/api-reference/algo-details-sgd.md diff --git a/docs/api-reference/algo-details-sgd.md b/docs/api-reference/algo-details-sgd.md new file mode 100644 index 0000000000..d008e590ae --- /dev/null +++ b/docs/api-reference/algo-details-sgd.md @@ -0,0 +1,9 @@ +### Training Algorithm Details +The Stochastic Gradient Descent (SGD) is one of the popular stochastic +optimization procedures that can be integrated into several machine learning +tasks to achieve state-of-the-art performance. This trainer implements the +Hogwild Stochastic Gradient Descent for binary classification that supports +multi-threading without any locking. If the associated optimization problem is +sparse, Hogwild Stochastic Gradient Descent achieves a nearly optimal rate of +convergence. For more details about Hogwild Stochastic Gradient Descent can be +found [here](http://arxiv.org/pdf/1106.5730v2.pdf). \ No newline at end of file diff --git a/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs b/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs index e509f11273..f74e5d46cd 100644 --- a/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs +++ b/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs @@ -2189,11 +2189,7 @@ private protected override void CheckLabel(RoleMappedData examples, out int weig /// | Is caching required? | No | /// | Required NuGet in addition to Microsoft.ML | None | /// - /// ### Training Algorithm Details - /// The Stochastic Gradient Descent (SGD) is one of the popular stochastic optimization procedures that can be integrated - /// into several machine learning tasks to achieve state-of-the-art performance. This trainer implements the Hogwild Stochastic Gradient Descent for binary classification - /// that supports multi-threading without any locking. If the associated optimization problem is sparse, Hogwild Stochastic Gradient Descent achieves a nearly optimal - /// rate of convergence. For more details about Hogwild Stochastic Gradient Descent can be found [here](http://arxiv.org/pdf/1106.5730v2.pdf). + /// [!include[io](~/../docs/samples/docs/api-reference/algo-details-sgd.md)] /// ]]> /// /// @@ -2279,11 +2275,7 @@ private protected override CalibratedModelParametersBase /// /// From dec85016ae8acd6f073bffa716650d835e64d435 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Sun, 21 Apr 2019 12:52:25 -0700 Subject: [PATCH 4/4] PR feedback. --- src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs b/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs index f74e5d46cd..d2260c2571 100644 --- a/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs +++ b/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs @@ -2179,7 +2179,7 @@ private protected override void CheckLabel(RoleMappedData examples, out int weig /// To create this trainer, use [SgdCalibrated](xref:Microsoft.ML.StandardTrainersCatalog.SgdCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,System.String,System.String,System.String,System.Int32,System.Double,System.Single)) /// or [SgdCalibrated(Options)](xref:Microsoft.ML.StandardTrainersCatalog.SgdCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.SgdCalibratedTrainer.Options)). /// - /// [!include[io](~/../docs/samples/docs/api-reference/io-columns-regression.md)] + /// [!include[io](~/../docs/samples/docs/api-reference/io-columns-binary-classification.md)] /// /// ### Trainer Characteristics /// | | | @@ -2265,7 +2265,7 @@ private protected override CalibratedModelParametersBase