Skip to content

Commit f51ecdb

Browse files
authored
XML documentation for FastTree binary classification. (#3398)
* XML documentation for FastTree binary classification. * PR feedback. * PR feedback.
1 parent df3efa5 commit f51ecdb

File tree

6 files changed

+69
-9
lines changed

6 files changed

+69
-9
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
### Training Algorithm Details
2+
FastTree is an efficient implementation of the
3+
[MART](https://arxiv.org/abs/1505.01866) gradient boosting algorithm. Gradient
4+
boosting is a machine learning technique for regression problems. It builds each
5+
regression tree in a step-wise fashion, using a predefined loss function to
6+
measure the error for each step and corrects for it in the next. So this
7+
prediction model is actually an ensemble of weaker prediction models. In
8+
regression problems, boosting builds a series of such trees in a step-wise
9+
fashion and then selects the optimal tree using an arbitrary differentiable loss
10+
function.
11+
12+
MART learns an ensemble of regression trees, which is a decision tree with
13+
scalar values in its leaves. A decision (or regression) tree is a binary
14+
tree-like flow chart, where at each interior node one decides which of the two
15+
child nodes to continue to based on one of the feature values from the input. At
16+
each leaf node, a value is returned. In the interior nodes, the decision is
17+
based on the test x <= v where x is the value of the feature in the input
18+
sample and v is one of the possible values of this feature. The functions that
19+
can be produced by a regression tree are all the piece-wise constant functions.
20+
21+
The ensemble of trees is produced by computing, in each step, a regression tree
22+
that approximates the gradient of the loss function, and adding it to the
23+
previous tree with coefficients that minimize the loss of the new tree. The
24+
output of the ensemble produced by MART on a given instance is the sum of the
25+
tree outputs.
26+
27+
* In case of a binary classification problem, the output is converted to a
28+
probability by using some form of calibration.
29+
* In case of a regression problem, the output is the predicted value of the
30+
function.
31+
* In case of a ranking problem, the instances are ordered by the output value of
32+
the ensemble.
33+
34+
For more information see:
35+
* [Wikipedia: Gradient boosting (Gradient tree
36+
boosting).](https://en.wikipedia.org/wiki/Gradient_boosting#Gradient_tree_boosting)
37+
* [Greedy function approximation: A gradient boosting
38+
machine.](https://projecteuclid.org/DPubS?service=UI&amp;version=1.0&amp;verb=Display&amp;handle=euclid.aos/1013203451)

src/Microsoft.ML.FastTree/FastTreeArguments.cs

+2-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,8 @@ public enum EarlyStoppingRankingMetric
5555
public sealed partial class FastTreeBinaryTrainer
5656
{
5757
/// <summary>
58-
/// Options for the <see cref="FastTreeBinaryTrainer"/>.
58+
/// Options for the <see cref="FastTreeBinaryTrainer"/> as used in
59+
/// [FastTree(Options)](xref:"Microsoft.ML.TreeExtensions.FastTree(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.FastTree.FastTreeBinaryTrainer.Options)).
5960
/// </summary>
6061
[TlcModule.Component(Name = LoadNameValue, FriendlyName = UserNameValue, Desc = Summary)]
6162
public sealed class Options : BoostedTreeOptions, IFastTreeTrainerFactory

src/Microsoft.ML.FastTree/FastTreeClassification.cs

+22-1
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,28 @@ private static IPredictorProducing<float> Create(IHostEnvironment env, ModelLoad
100100
/// <summary>
101101
/// The <see cref="IEstimator{TTransformer}"/> for training a decision tree binary classification model using FastTree.
102102
/// </summary>
103-
/// <include file='doc.xml' path='doc/members/member[@name="FastTree_remarks"]/*' />
103+
/// <remarks>
104+
/// <format type="text/markdown"><![CDATA[
105+
/// To create this trainer, use [FastTree](xref:Microsoft.ML.TreeExtensions.FastTree(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,System.String,System.String,System.String,System.Int32,System.Int32,System.Int32,System.Double))
106+
/// or [FastTree(Options)](xref:"Microsoft.ML.TreeExtensions.FastTree(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.FastTree.FastTreeBinaryTrainer.Options)).
107+
///
108+
/// [!include[io](~/../docs/samples/docs/api-reference/io-columns-binary-classification.md)]
109+
///
110+
/// ### Trainer Characteristics
111+
/// | | |
112+
/// | -- | -- |
113+
/// | Machine learning task | Binary classification |
114+
/// | Is normalization required? | No |
115+
/// | Is caching required? | No |
116+
/// | Required NuGet in addition to Microsoft.ML | Microsoft.ML.FastTree |
117+
///
118+
/// [!include[algorithm](~/../docs/samples/docs/api-reference/algo-details-fasttree.md)]
119+
/// ]]>
120+
/// </format>
121+
/// </remarks>
122+
/// <seealso cref="Microsoft.ML.TreeExtensions.FastTree(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,System.String,System.String,System.String,System.Int32,System.Int32,System.Int32,System.Double)"/>
123+
/// <seealso cref="Microsoft.ML.TreeExtensions.FastTree(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.FastTree.FastTreeBinaryTrainer.Options)"/>
124+
/// <seealso cref="Options"/>
104125
public sealed partial class FastTreeBinaryTrainer :
105126
BoostingFastTreeTrainerBase<FastTreeBinaryTrainer.Options,
106127
BinaryPredictionTransformer<CalibratedModelParametersBase<FastTreeBinaryModelParameters, PlattCalibrator>>,

src/Microsoft.ML.FastTree/RandomForestClassification.cs

+2-2
Original file line numberDiff line numberDiff line change
@@ -124,9 +124,9 @@ private static IPredictorProducing<float> Create(IHostEnvironment env, ModelLoad
124124
/// | | |
125125
/// | -- | -- |
126126
/// | Machine learning task | Binary classification |
127-
/// | Is normalization required? | Yes |
127+
/// | Is normalization required? | No |
128128
/// | Is caching required? | No |
129-
/// | Required NuGet in addition to Microsoft.ML | None |
129+
/// | Required NuGet in addition to Microsoft.ML | Microsoft.ML.FastTree |
130130
///
131131
/// [!include[algorithm](~/../docs/samples/docs/api-reference/algo-details-fastforest.md)]
132132
/// ]]>

src/Microsoft.ML.FastTree/RandomForestRegression.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ ISchemaBindableMapper IQuantileRegressionPredictor.CreateMapper(Double[] quantil
255255
/// | | |
256256
/// | -- | -- |
257257
/// | Machine learning task | Regression |
258-
/// | Is normalization required? | Yes |
258+
/// | Is normalization required? | No |
259259
/// | Is caching required? | No |
260260
/// | Required NuGet in addition to Microsoft.ML | Microsoft.ML.FastTree |
261261
///

src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs

+4-4
Original file line numberDiff line numberDiff line change
@@ -68,11 +68,11 @@ public static FastTreeRegressionTrainer FastTree(this RegressionCatalog.Regressi
6868
}
6969

7070
/// <summary>
71-
/// Predict a target using a decision tree binary classification model trained with the <see cref="FastTreeBinaryTrainer"/>.
71+
/// Create <see cref="FastTreeBinaryTrainer"/>, which predicts a target using a decision tree binary classification model.
7272
/// </summary>
7373
/// <param name="catalog">The <see cref="BinaryClassificationCatalog"/>.</param>
74-
/// <param name="labelColumnName">The name of the label column.</param>
75-
/// <param name="featureColumnName">The name of the feature column.</param>
74+
/// <param name="labelColumnName">The name of the label column. The column data must be <see cref="System.Boolean"/>.</param>
75+
/// <param name="featureColumnName">The name of the feature column. The column data must be a known-sized vector of <see cref="System.Single"/>.</param>
7676
/// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
7777
/// <param name="numberOfTrees">Total number of decision trees to create in the ensemble.</param>
7878
/// <param name="numberOfLeaves">The maximum number of leaves per decision tree.</param>
@@ -100,7 +100,7 @@ public static FastTreeBinaryTrainer FastTree(this BinaryClassificationCatalog.Bi
100100
}
101101

102102
/// <summary>
103-
/// Predict a target using a decision tree binary classification model trained with the <see cref="FastTreeBinaryTrainer"/> and advanced options.
103+
/// Create <see cref="FastTreeBinaryTrainer"/> with advanced options, which predicts a target using a decision tree binary classification model.
104104
/// </summary>
105105
/// <param name="catalog">The <see cref="BinaryClassificationCatalog"/>.</param>
106106
/// <param name="options">Trainer options.</param>

0 commit comments

Comments
 (0)