XML documentation for FastTree binary classification. (#3398)

codemzs · web-flow · commit f51ecdbbdf14 · 2019-04-19T15:03:58.000-07:00
* XML documentation for FastTree binary classification.

* PR feedback.

* PR feedback.
diff --git a/docs/api-reference/algo-details-fasttree.md b/docs/api-reference/algo-details-fasttree.md
@@ -0,0 +1,38 @@
+### Training Algorithm Details
+FastTree is an efficient implementation of the
+[MART](https://arxiv.org/abs/1505.01866) gradient boosting algorithm. Gradient
+boosting is a machine learning technique for regression problems. It builds each
+regression tree in a step-wise fashion, using a predefined loss function to
+measure the error for each step and corrects for it in the next. So this
+prediction model is actually an ensemble of weaker prediction models. In
+regression problems, boosting builds a series of such trees in a step-wise
+fashion and then selects the optimal tree using an arbitrary differentiable loss
+function.
+
+MART learns an ensemble of regression trees, which is a decision tree with
+scalar values in its leaves. A decision (or regression) tree is a binary
+tree-like flow chart, where at each interior node one decides which of the two
+child nodes to continue to based on one of the feature values from the input. At
+each leaf node, a value is returned. In the interior nodes, the decision is
+based on the test x <= v where x is the value of the feature in the input
+sample and v is one of the possible values of this feature. The functions that
+can be produced by a regression tree are all the piece-wise constant functions.
+          
+The ensemble of trees is produced by computing, in each step, a regression tree
+that approximates the gradient of the loss function, and adding it to the
+previous tree with coefficients that minimize the loss of the new tree. The
+output of the ensemble produced by MART on a given instance is the sum of the
+tree outputs.
+
+* In case of a binary classification problem, the output is converted to a
+  probability by using some form of calibration.
+* In case of a regression problem, the output is the predicted value of the
+  function.
+* In case of a ranking problem, the instances are ordered by the output value of
+  the ensemble.
+
+For more information see:
+* [Wikipedia: Gradient boosting (Gradient tree
+boosting).](https://en.wikipedia.org/wiki/Gradient_boosting#Gradient_tree_boosting)
+* [Greedy function approximation: A gradient boosting
+machine.](https://projecteuclid.org/DPubS?service=UI&amp;version=1.0&amp;verb=Display&amp;handle=euclid.aos/1013203451)
diff --git a/src/Microsoft.ML.FastTree/FastTreeArguments.cs b/src/Microsoft.ML.FastTree/FastTreeArguments.cs
@@ -55,7 +55,8 @@ public enum EarlyStoppingRankingMetric
     public sealed partial class FastTreeBinaryTrainer
     {
         /// <summary>
-        /// Options for the <see cref="FastTreeBinaryTrainer"/>.
+        /// Options for the <see cref="FastTreeBinaryTrainer"/> as used in
+        /// [FastTree(Options)](xref:"Microsoft.ML.TreeExtensions.FastTree(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.FastTree.FastTreeBinaryTrainer.Options)).
         /// </summary>
         [TlcModule.Component(Name = LoadNameValue, FriendlyName = UserNameValue, Desc = Summary)]
         public sealed class Options : BoostedTreeOptions, IFastTreeTrainerFactory
diff --git a/src/Microsoft.ML.FastTree/FastTreeClassification.cs b/src/Microsoft.ML.FastTree/FastTreeClassification.cs
@@ -100,7 +100,28 @@ private static IPredictorProducing<float> Create(IHostEnvironment env, ModelLoad
     /// <summary>
     /// The <see cref="IEstimator{TTransformer}"/> for training a decision tree binary classification model using FastTree.
     /// </summary>
-    /// <include file='doc.xml' path='doc/members/member[@name="FastTree_remarks"]/*' />
+    /// <remarks>
+    /// <format type="text/markdown"><![CDATA[
+    /// To create this trainer, use [FastTree](xref:Microsoft.ML.TreeExtensions.FastTree(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,System.String,System.String,System.String,System.Int32,System.Int32,System.Int32,System.Double))
+    /// or [FastTree(Options)](xref:"Microsoft.ML.TreeExtensions.FastTree(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.FastTree.FastTreeBinaryTrainer.Options)).
+    ///
+    /// [!include[io](~/../docs/samples/docs/api-reference/io-columns-binary-classification.md)]
+    ///
+    /// ### Trainer Characteristics
+    /// |  |  |
+    /// | -- | -- |
+    /// | Machine learning task | Binary classification |
+    /// | Is normalization required? | No |
+    /// | Is caching required? | No |
+    /// | Required NuGet in addition to Microsoft.ML | Microsoft.ML.FastTree |
+    ///
+    /// [!include[algorithm](~/../docs/samples/docs/api-reference/algo-details-fasttree.md)]
+    /// ]]>
+    /// </format>
+    /// </remarks>
+    /// <seealso cref="Microsoft.ML.TreeExtensions.FastTree(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,System.String,System.String,System.String,System.Int32,System.Int32,System.Int32,System.Double)"/>
+    /// <seealso cref="Microsoft.ML.TreeExtensions.FastTree(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.FastTree.FastTreeBinaryTrainer.Options)"/>
+    /// <seealso cref="Options"/>
     public sealed partial class FastTreeBinaryTrainer :
         BoostingFastTreeTrainerBase<FastTreeBinaryTrainer.Options,
         BinaryPredictionTransformer<CalibratedModelParametersBase<FastTreeBinaryModelParameters, PlattCalibrator>>,
diff --git a/src/Microsoft.ML.FastTree/RandomForestClassification.cs b/src/Microsoft.ML.FastTree/RandomForestClassification.cs
@@ -124,9 +124,9 @@ private static IPredictorProducing<float> Create(IHostEnvironment env, ModelLoad
     /// |  |  |
     /// | -- | -- |
     /// | Machine learning task | Binary classification |
-    /// | Is normalization required? | Yes |
+    /// | Is normalization required? | No |
     /// | Is caching required? | No |
-    /// | Required NuGet in addition to Microsoft.ML | None |
+    /// | Required NuGet in addition to Microsoft.ML | Microsoft.ML.FastTree |
     ///
     /// [!include[algorithm](~/../docs/samples/docs/api-reference/algo-details-fastforest.md)]
     /// ]]>
diff --git a/src/Microsoft.ML.FastTree/RandomForestRegression.cs b/src/Microsoft.ML.FastTree/RandomForestRegression.cs
@@ -255,7 +255,7 @@ ISchemaBindableMapper IQuantileRegressionPredictor.CreateMapper(Double[] quantil
     /// |  |  |
     /// | -- | -- |
     /// | Machine learning task | Regression |
-    /// | Is normalization required? | Yes |
+    /// | Is normalization required? | No |
     /// | Is caching required? | No |
     /// | Required NuGet in addition to Microsoft.ML | Microsoft.ML.FastTree |
     ///
diff --git a/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs b/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs
@@ -68,11 +68,11 @@ public static FastTreeRegressionTrainer FastTree(this RegressionCatalog.Regressi
         }
 
         /// <summary>
-        /// Predict a target using a decision tree binary classification model trained with the <see cref="FastTreeBinaryTrainer"/>.
+        /// Create <see cref="FastTreeBinaryTrainer"/>, which predicts a target using a decision tree binary classification model.
         /// </summary>
         /// <param name="catalog">The <see cref="BinaryClassificationCatalog"/>.</param>
-        /// <param name="labelColumnName">The name of the label column.</param>
-        /// <param name="featureColumnName">The name of the feature column.</param>
+        /// <param name="labelColumnName">The name of the label column. The column data must be <see cref="System.Boolean"/>.</param>
+        /// <param name="featureColumnName">The name of the feature column. The column data must be a known-sized vector of <see cref="System.Single"/>.</param>
         /// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
         /// <param name="numberOfTrees">Total number of decision trees to create in the ensemble.</param>
         /// <param name="numberOfLeaves">The maximum number of leaves per decision tree.</param>
@@ -100,7 +100,7 @@ public static FastTreeBinaryTrainer FastTree(this BinaryClassificationCatalog.Bi
         }
 
         /// <summary>
-        /// Predict a target using a decision tree binary classification model trained with the <see cref="FastTreeBinaryTrainer"/> and advanced options.
+        /// Create <see cref="FastTreeBinaryTrainer"/> with advanced options, which predicts a target using a decision tree binary classification model.
         /// </summary>
         /// <param name="catalog">The <see cref="BinaryClassificationCatalog"/>.</param>
         /// <param name="options">Trainer options.</param>