diff --git a/docs/api-reference/algo-details-sgd.md b/docs/api-reference/algo-details-sgd.md new file mode 100644 index 0000000000..d008e590ae --- /dev/null +++ b/docs/api-reference/algo-details-sgd.md @@ -0,0 +1,9 @@ +### Training Algorithm Details +The Stochastic Gradient Descent (SGD) is one of the popular stochastic +optimization procedures that can be integrated into several machine learning +tasks to achieve state-of-the-art performance. This trainer implements the +Hogwild Stochastic Gradient Descent for binary classification that supports +multi-threading without any locking. If the associated optimization problem is +sparse, Hogwild Stochastic Gradient Descent achieves a nearly optimal rate of +convergence. For more details about Hogwild Stochastic Gradient Descent can be +found [here](http://arxiv.org/pdf/1106.5730v2.pdf). \ No newline at end of file diff --git a/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs b/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs index bbe982b6cb..d2260c2571 100644 --- a/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs +++ b/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs @@ -137,7 +137,7 @@ private protected float WScaledDot(in VBuffer features, Double scaling, i } private protected virtual int ComputeNumThreads(FloatLabelCursor.Factory cursorFactory) - => Math.Min(8, Math.Max(1, Environment.ProcessorCount / 2)); + => Math.Min(8, Math.Max(1, Environment.ProcessorCount / 2)); } public abstract class SdcaTrainerBase : StochasticTrainerBase @@ -2175,16 +2175,34 @@ private protected override void CheckLabel(RoleMappedData examples, out int weig /// linear function to a . /// /// - /// The Stochastic Gradient Descent (SGD) is one of the popular stochastic optimization procedures that can be integrated - /// into several machine learning tasks to achieve state-of-the-art performance. This trainer implements the Hogwild SGD for binary classification - /// that supports multi-threading without any locking. If the associated optimization problem is sparse, Hogwild SGD achieves a nearly optimal - /// rate of convergence. For more details about Hogwild SGD, please refer to http://arxiv.org/pdf/1106.5730v2.pdf. + /// + /// /// + /// + /// + /// public sealed class SgdCalibratedTrainer : SgdBinaryTrainerBase> { + /// - /// Options for the . + /// Options for the as used in + /// [SgdCalibrated(Options)](xref:Microsoft.ML.StandardTrainersCatalog.SgdCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.SgdCalibratedTrainer.Options)). /// public sealed class Options : OptionsBase { @@ -2240,12 +2258,37 @@ private protected override CalibratedModelParametersBase - /// can train a linear classification model by minimizing any loss function - /// which implements . + /// The for training logistic regression using a parallel stochastic gradient method. /// + /// + /// + /// + /// + /// + /// + /// public sealed class SgdNonCalibratedTrainer : SgdBinaryTrainerBase { + /// + /// Options for the as used in + /// [SgdNonCalibrated(Options)](xref:Microsoft.ML.StandardTrainersCatalog.SgdNonCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.SgdNonCalibratedTrainer.Options)). + /// public sealed class Options : OptionsBase { /// diff --git a/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs b/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs index 486a8dfd9f..55c189b32a 100644 --- a/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs +++ b/src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs @@ -17,12 +17,12 @@ namespace Microsoft.ML public static class StandardTrainersCatalog { /// - /// Predict a target using a linear classification model trained with . + /// Create , which predicts a target using a linear classification model. /// Stochastic gradient descent (SGD) is an iterative algorithm that optimizes a differentiable objective function. /// /// The binary classification catalog trainer object. - /// The name of the label column, or dependent variable. - /// The features, or independent variables. + /// The name of the label column, or dependent variable. The column data must be . + /// The features, or independent variables. The column data must be a known-sized vector of . /// The name of the example weight column (optional). /// The maximum number of passes through the training dataset; set to 1 to simulate online learning. /// The initial learning rate used by SGD. @@ -49,7 +49,7 @@ public static SgdCalibratedTrainer SgdCalibrated(this BinaryClassificationCatalo } /// - /// Predict a target using a linear classification model trained with and advanced options. + /// Create with advanced options, which predicts a target using a linear classification model. /// Stochastic gradient descent (SGD) is an iterative algorithm that optimizes a differentiable objective function. /// /// The binary classification catalog trainer object. @@ -72,12 +72,12 @@ public static SgdCalibratedTrainer SgdCalibrated(this BinaryClassificationCatalo } /// - /// Predict a target using a linear classification model trained with . + /// Create , which predicts a target using a linear classification model. /// Stochastic gradient descent (SGD) is an iterative algorithm that optimizes a differentiable objective function. /// /// The binary classification catalog trainer object. - /// The name of the label column, or dependent variable. - /// The features, or independent variables. + /// The name of the label column, or dependent variable. The column data must be . + /// The features, or independent variables. The column data must be a known-sized vector of . /// The name of the example weight column (optional). /// The loss function minimized in the training process. Using, for example, leads to a support vector machine trainer. /// The maximum number of passes through the training dataset; set to 1 to simulate online learning. @@ -106,7 +106,7 @@ public static SgdNonCalibratedTrainer SgdNonCalibrated(this BinaryClassification } /// - /// Predict a target using a linear classification model trained with and advanced options. + /// Create with advanced options, which predicts a target using a linear classification model. /// Stochastic gradient descent (SGD) is an iterative algorithm that optimizes a differentiable objective function. /// /// The binary classification catalog trainer object.