Skip to content

Commit 2153977

Browse files
authored
Xml documentation for Calibrated and Non Calibrated SGD Trainer. (#3392)
* Xml documentation for Calibrated and Non Calibrated SGD Trainer. * PR feedback. * PR feedback. * PR feedback.
1 parent 4deb4fc commit 2153977

File tree

3 files changed

+68
-16
lines changed

3 files changed

+68
-16
lines changed
+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
### Training Algorithm Details
2+
The Stochastic Gradient Descent (SGD) is one of the popular stochastic
3+
optimization procedures that can be integrated into several machine learning
4+
tasks to achieve state-of-the-art performance. This trainer implements the
5+
Hogwild Stochastic Gradient Descent for binary classification that supports
6+
multi-threading without any locking. If the associated optimization problem is
7+
sparse, Hogwild Stochastic Gradient Descent achieves a nearly optimal rate of
8+
convergence. For more details about Hogwild Stochastic Gradient Descent can be
9+
found [here](http://arxiv.org/pdf/1106.5730v2.pdf).

src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs

+51-8
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ private protected float WScaledDot(in VBuffer<float> features, Double scaling, i
137137
}
138138

139139
private protected virtual int ComputeNumThreads(FloatLabelCursor.Factory cursorFactory)
140-
=> Math.Min(8, Math.Max(1, Environment.ProcessorCount / 2));
140+
=> Math.Min(8, Math.Max(1, Environment.ProcessorCount / 2));
141141
}
142142

143143
public abstract class SdcaTrainerBase<TOptions, TTransformer, TModel> : StochasticTrainerBase<TTransformer, TModel>
@@ -2175,16 +2175,34 @@ private protected override void CheckLabel(RoleMappedData examples, out int weig
21752175
/// linear function to a <see cref="PlattCalibrator"/>.
21762176
/// </summary>
21772177
/// <remarks>
2178-
/// The Stochastic Gradient Descent (SGD) is one of the popular stochastic optimization procedures that can be integrated
2179-
/// into several machine learning tasks to achieve state-of-the-art performance. This trainer implements the Hogwild SGD for binary classification
2180-
/// that supports multi-threading without any locking. If the associated optimization problem is sparse, Hogwild SGD achieves a nearly optimal
2181-
/// rate of convergence. For more details about Hogwild SGD, please refer to http://arxiv.org/pdf/1106.5730v2.pdf.
2178+
/// <format type="text/markdown"><![CDATA[
2179+
/// To create this trainer, use [SgdCalibrated](xref:Microsoft.ML.StandardTrainersCatalog.SgdCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,System.String,System.String,System.String,System.Int32,System.Double,System.Single))
2180+
/// or [SgdCalibrated(Options)](xref:Microsoft.ML.StandardTrainersCatalog.SgdCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.SgdCalibratedTrainer.Options)).
2181+
///
2182+
/// [!include[io](~/../docs/samples/docs/api-reference/io-columns-binary-classification.md)]
2183+
///
2184+
/// ### Trainer Characteristics
2185+
/// | | |
2186+
/// | -- | -- |
2187+
/// | Machine learning task | Binary classification |
2188+
/// | Is normalization required? | Yes |
2189+
/// | Is caching required? | No |
2190+
/// | Required NuGet in addition to Microsoft.ML | None |
2191+
///
2192+
/// [!include[io](~/../docs/samples/docs/api-reference/algo-details-sgd.md)]
2193+
/// ]]>
2194+
/// </format>
21822195
/// </remarks>
2196+
/// <seealso cref="StandardTrainersCatalog.SgdCalibrated(BinaryClassificationCatalog.BinaryClassificationTrainers, string, string, string, int, double, float)"/>
2197+
/// <seealso cref="StandardTrainersCatalog.SgdCalibrated(BinaryClassificationCatalog.BinaryClassificationTrainers, SgdCalibratedTrainer.Options)"/>
2198+
/// <seealso cref="Options"/>
21832199
public sealed class SgdCalibratedTrainer :
21842200
SgdBinaryTrainerBase<CalibratedModelParametersBase<LinearBinaryModelParameters, PlattCalibrator>>
21852201
{
2202+
21862203
/// <summary>
2187-
/// Options for the <see cref="SgdCalibratedTrainer"/>.
2204+
/// Options for the <see cref="SgdCalibratedTrainer"/> as used in
2205+
/// [SgdCalibrated(Options)](xref:Microsoft.ML.StandardTrainersCatalog.SgdCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.SgdCalibratedTrainer.Options)).
21882206
/// </summary>
21892207
public sealed class Options : OptionsBase
21902208
{
@@ -2240,12 +2258,37 @@ private protected override CalibratedModelParametersBase<LinearBinaryModelParame
22402258
}
22412259

22422260
/// <summary>
2243-
/// <see cref="SgdNonCalibratedTrainer"/> can train a linear classification model by minimizing any loss function
2244-
/// which implements <see cref="IClassificationLoss"/>.
2261+
/// The <see cref="IEstimator{TTransformer}"/> for training logistic regression using a parallel stochastic gradient method.
22452262
/// </summary>
2263+
/// <remarks>
2264+
/// <format type="text/markdown"><![CDATA[
2265+
/// To create this trainer, use [SgdNonCalibrated](xref:Microsoft.ML.StandardTrainersCatalog.SgdNonCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,System.String,System.String,System.String,Microsoft.ML.Trainers.IClassificationLoss,System.Int32,System.Double,System.Single))
2266+
/// or [SgdNonCalibrated(Options)](xref:Microsoft.ML.StandardTrainersCatalog.SgdNonCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.SgdNonCalibratedTrainer.Options)).
2267+
///
2268+
/// [!include[io](~/../docs/samples/docs/api-reference/io-columns-binary-classification.md)]
2269+
///
2270+
/// ### Trainer Characteristics
2271+
/// | | |
2272+
/// | -- | -- |
2273+
/// | Machine learning task | Binary classification |
2274+
/// | Is normalization required? | Yes |
2275+
/// | Is caching required? | No |
2276+
/// | Required NuGet in addition to Microsoft.ML | None |
2277+
///
2278+
/// [!include[io](~/../docs/samples/docs/api-reference/algo-details-sgd.md)]
2279+
/// ]]>
2280+
/// </format>
2281+
/// </remarks>
2282+
/// <seealso cref="StandardTrainersCatalog.SgdNonCalibrated(BinaryClassificationCatalog.BinaryClassificationTrainers, string, string, string, IClassificationLoss, int, double, float)"/>
2283+
/// <seealso cref="StandardTrainersCatalog.SgdNonCalibrated(BinaryClassificationCatalog.BinaryClassificationTrainers, SgdNonCalibratedTrainer.Options)"/>
2284+
/// <seealso cref="Options"/>
22462285
public sealed class SgdNonCalibratedTrainer :
22472286
SgdBinaryTrainerBase<LinearBinaryModelParameters>
22482287
{
2288+
/// <summary>
2289+
/// Options for the <see cref="SgdNonCalibratedTrainer"/> as used in
2290+
/// [SgdNonCalibrated(Options)](xref:Microsoft.ML.StandardTrainersCatalog.SgdNonCalibrated(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.SgdNonCalibratedTrainer.Options)).
2291+
/// </summary>
22492292
public sealed class Options : OptionsBase
22502293
{
22512294
/// <summary>

src/Microsoft.ML.StandardTrainers/StandardTrainersCatalog.cs

+8-8
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@ namespace Microsoft.ML
1717
public static class StandardTrainersCatalog
1818
{
1919
/// <summary>
20-
/// Predict a target using a linear classification model trained with <see cref="SgdCalibratedTrainer"/>.
20+
/// Create <see cref="SgdCalibratedTrainer"/>, which predicts a target using a linear classification model.
2121
/// Stochastic gradient descent (SGD) is an iterative algorithm that optimizes a differentiable objective function.
2222
/// </summary>
2323
/// <param name="catalog">The binary classification catalog trainer object.</param>
24-
/// <param name="labelColumnName">The name of the label column, or dependent variable.</param>
25-
/// <param name="featureColumnName">The features, or independent variables.</param>
24+
/// <param name="labelColumnName">The name of the label column, or dependent variable. The column data must be <see cref="System.Boolean"/>.</param>
25+
/// <param name="featureColumnName">The features, or independent variables. The column data must be a known-sized vector of <see cref="System.Single"/>.</param>
2626
/// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
2727
/// <param name="numberOfIterations">The maximum number of passes through the training dataset; set to 1 to simulate online learning.</param>
2828
/// <param name="learningRate">The initial learning rate used by SGD.</param>
@@ -49,7 +49,7 @@ public static SgdCalibratedTrainer SgdCalibrated(this BinaryClassificationCatalo
4949
}
5050

5151
/// <summary>
52-
/// Predict a target using a linear classification model trained with <see cref="SgdCalibratedTrainer"/> and advanced options.
52+
/// Create <see cref="Trainers.SgdCalibratedTrainer"/> with advanced options, which predicts a target using a linear classification model.
5353
/// Stochastic gradient descent (SGD) is an iterative algorithm that optimizes a differentiable objective function.
5454
/// </summary>
5555
/// <param name="catalog">The binary classification catalog trainer object.</param>
@@ -72,12 +72,12 @@ public static SgdCalibratedTrainer SgdCalibrated(this BinaryClassificationCatalo
7272
}
7373

7474
/// <summary>
75-
/// Predict a target using a linear classification model trained with <see cref="SgdNonCalibratedTrainer"/>.
75+
/// Create <see cref="Trainers.SgdNonCalibratedTrainer"/>, which predicts a target using a linear classification model.
7676
/// Stochastic gradient descent (SGD) is an iterative algorithm that optimizes a differentiable objective function.
7777
/// </summary>
7878
/// <param name="catalog">The binary classification catalog trainer object.</param>
79-
/// <param name="labelColumnName">The name of the label column, or dependent variable.</param>
80-
/// <param name="featureColumnName">The features, or independent variables.</param>
79+
/// <param name="labelColumnName">The name of the label column, or dependent variable. The column data must be <see cref="System.Boolean"/>.</param>
80+
/// <param name="featureColumnName">The features, or independent variables. The column data must be a known-sized vector of <see cref="System.Single"/>.</param>
8181
/// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
8282
/// <param name="lossFunction">The <a href="https://en.wikipedia.org/wiki/Loss_function">loss</a> function minimized in the training process. Using, for example, <see cref="HingeLoss"/> leads to a support vector machine trainer.</param>
8383
/// <param name="numberOfIterations">The maximum number of passes through the training dataset; set to 1 to simulate online learning.</param>
@@ -106,7 +106,7 @@ public static SgdNonCalibratedTrainer SgdNonCalibrated(this BinaryClassification
106106
}
107107

108108
/// <summary>
109-
/// Predict a target using a linear classification model trained with <see cref="SgdNonCalibratedTrainer"/> and advanced options.
109+
/// Create <see cref="Trainers.SgdNonCalibratedTrainer"/> with advanced options, which predicts a target using a linear classification model.
110110
/// Stochastic gradient descent (SGD) is an iterative algorithm that optimizes a differentiable objective function.
111111
/// </summary>
112112
/// <param name="catalog">The binary classification catalog trainer object.</param>

0 commit comments

Comments
 (0)