From d171a6a90941ede4de8ebbb445015fb39a45a489 Mon Sep 17 00:00:00 2001
From: Zeeshan Siddiqui <mzs@microsoft.com>
Date: Thu, 18 Apr 2019 02:27:58 -0700
Subject: [PATCH 1/3] XML documentation for FastTree binary classification.

---
 .../FastTreeClassification.cs                 | 42 ++++++++++++++++++-
 .../TreeTrainersCatalog.cs                    |  8 ++--
 2 files changed, 45 insertions(+), 5 deletions(-)
diff --git a/src/Microsoft.ML.FastTree/FastTreeClassification.cs b/src/Microsoft.ML.FastTree/FastTreeClassification.cs
index 78e0911957..7e58d1c776 100644
--- a/src/Microsoft.ML.FastTree/FastTreeClassification.cs
+++ b/src/Microsoft.ML.FastTree/FastTreeClassification.cs
@@ -100,7 +100,47 @@ private static IPredictorProducing<float> Create(IHostEnvironment env, ModelLoad
     /// <summary>
     /// The <see cref="IEstimator{TTransformer}"/> for training a decision tree binary classification model using FastTree.
     /// </summary>
-    /// <include file='doc.xml' path='doc/members/member[@name="FastTree_remarks"]/*' />
+    /// <remarks>
+    /// <format type="text/markdown"><![CDATA[
+    /// To create this trainer, use [FastTree](xref:Microsoft.ML.TreeExtensions.FastTree(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,System.String,System.String,System.String,System.Int32,System.Int32,System.Int32,System.Double))
+    /// or [FastTree(Options)](xref:"Microsoft.ML.TreeExtensions.FastTree(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.FastTree.FastTreeBinaryTrainer.Options)).
+    ///
+    /// [!include[io](~/../docs/samples/docs/api-reference/io-columns-binary-classification.md)]
+    ///
+    /// ### Trainer Characteristics
+    /// |  |  |
+    /// | -- | -- |
+    /// | Machine learning task | Binary classification |
+    /// | Is normalization required? | Yes |
+    /// | Is caching required? | No |
+    /// | Required NuGet in addition to Microsoft.ML | None |
+    ///
+    /// ### Training Algorithm Details
+    /// FastTree is an efficient implementation of the [MART](https://arxiv.org/abs/1505.01866) gradient boosting algorithm.
+    /// Gradient boosting is a machine learning technique for regression problems.
+    /// It builds each regression tree in a step-wise fashion, using a predefined loss function to measure the error for each step and corrects for it in the next.
+    /// So this prediction model is actually an ensemble of weaker prediction models.In regression problems, boosting builds a series of such trees in a step-wise fashion and then selects the optimal tree using an arbitrary differentiable loss function.
+    ///
+    /// MART learns an ensemble of regression trees, which is a decision tree with scalar values in its leaves.
+    /// A decision(or regression) tree is a binary tree-like flow chart, where at each interior node one decides which of the two child nodes to continue to based on one of the feature values from the input.
+    /// At each leaf node, a value is returned.In the interior nodes, the decision is based on the test 'x &lt;= v' where x is the value of the feature in the input sample and v is one of the possible values of this feature.
+    /// The functions that can be produced by a regression tree are all the piece-wise constant functions.
+    ///
+    /// The ensemble of trees is produced by computing, in each step, a regression tree that approximates the gradient of the loss function, and adding it to the previous tree with coefficients that minimize the loss of the new tree.
+    /// The output of the ensemble produced by MART on a given instance is the sum of the tree outputs.
+    /// In case of a binary classification problem, the output is converted to a probability by using some form of calibration.
+    /// In case of a regression problem, the output is the predicted value of the function.
+    /// In case of a ranking problem, the instances are ordered by the output value of the ensemble.
+    ///
+    /// For more information see:
+    /// [Wikipedia: Gradient boosting (Gradient tree boosting)](https://en.wikipedia.org/wiki/Gradient_boosting#Gradient_tree_boosting).
+    /// [Greedy function approximation: A gradient boosting machine.](https://projecteuclid.org/DPubS?service=UI&amp;version=1.0&amp;verb=Display&amp;handle=euclid.aos/1013203451)
+    /// ]]>
+    /// </format>
+    /// </remarks>
+    /// <seealso cref="Microsoft.ML.TreeExtensions.FastTree(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,System.String,System.String,System.String,System.Int32,System.Int32,System.Int32,System.Double)"/>
+    /// <seealso cref="Microsoft.ML.TreeExtensions.FastTree(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.FastTree.FastTreeBinaryTrainer.Options)"/>
+    /// <seealso cref="Options"/>
     public sealed partial class FastTreeBinaryTrainer :
         BoostingFastTreeTrainerBase<FastTreeBinaryTrainer.Options,
         BinaryPredictionTransformer<CalibratedModelParametersBase<FastTreeBinaryModelParameters, PlattCalibrator>>,
diff --git a/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs b/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs
index fb1fb69cfb..1dd610d8c9 100644
--- a/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs
+++ b/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs
@@ -68,11 +68,11 @@ public static FastTreeRegressionTrainer FastTree(this RegressionCatalog.Regressi
         }
 
         /// <summary>
-        /// Predict a target using a decision tree binary classification model trained with the <see cref="FastTreeBinaryTrainer"/>.
+        /// Creates a <see cref="FastTreeBinaryTrainer"/>, which predicts a target using a decision tree binary classification model.
         /// </summary>
         /// <param name="catalog">The <see cref="BinaryClassificationCatalog"/>.</param>
-        /// <param name="labelColumnName">The name of the label column.</param>
-        /// <param name="featureColumnName">The name of the feature column.</param>
+        /// <param name="labelColumnName">The name of the label column. The column data must be <see cref="System.Boolean"/></param>
+        /// <param name="featureColumnName">The name of the feature column. The column data must be a known-sized vector of <see cref="System.Single"/></param>
         /// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
         /// <param name="numberOfTrees">Total number of decision trees to create in the ensemble.</param>
         /// <param name="numberOfLeaves">The maximum number of leaves per decision tree.</param>
@@ -100,7 +100,7 @@ public static FastTreeBinaryTrainer FastTree(this BinaryClassificationCatalog.Bi
         }
 
         /// <summary>
-        /// Predict a target using a decision tree binary classification model trained with the <see cref="FastTreeBinaryTrainer"/> and advanced options.
+        /// Creates a <see cref="FastTreeBinaryTrainer"/>, which predicts a target using a decision tree binary classification model using advanced options.
         /// </summary>
         /// <param name="catalog">The <see cref="BinaryClassificationCatalog"/>.</param>
         /// <param name="options">Trainer options.</param>

From a2a73839d627baf983041144c7e7332dacb85ef5 Mon Sep 17 00:00:00 2001
From: Zeeshan Siddiqui <mzs@microsoft.com>
Date: Fri, 19 Apr 2019 14:41:13 -0700
Subject: [PATCH 2/3] PR feedback.

---
 docs/api-reference/algo-details-fasttree.md   | 39 +++++++++++++++++++
 .../FastTreeArguments.cs                      |  6 ++-
 .../FastTreeClassification.cs                 | 25 ++----------
 .../RandomForestClassification.cs             |  4 +-
 .../RandomForestRegression.cs                 |  2 +-
 .../TreeTrainersCatalog.cs                    |  8 ++--
 6 files changed, 53 insertions(+), 31 deletions(-)
 create mode 100644 docs/api-reference/algo-details-fasttree.md

diff --git a/docs/api-reference/algo-details-fasttree.md b/docs/api-reference/algo-details-fasttree.md
new file mode 100644
index 0000000000..b111faa96c
--- /dev/null
+++ b/docs/api-reference/algo-details-fasttree.md
@@ -0,0 +1,39 @@
+### Training Algorithm Details
+FastTree is an efficient implementation of the
+[MART](https://arxiv.org/abs/1505.01866) gradient boosting algorithm. Gradient
+boosting is a machine learning technique for regression problems. It builds each
+regression tree in a step-wise fashion, using a predefined loss function to
+measure the error for each step and corrects for it in the next. So this
+prediction model is actually an ensemble of weaker prediction models. In
+regression problems, boosting builds a series of such trees in a step-wise
+fashion and then selects the optimal tree using an arbitrary differentiable loss
+function.
+
+MART learns an ensemble of regression trees, which is a decision tree with
+scalar values in its leaves. A decision (or regression) tree is a binary
+tree-like flow chart, where at each interior node one decides which of the two
+child nodes to continue to based on one of the feature values from the input. At
+each leaf node, a value is returned. In the interior nodes, the decision is
+based on the test 'x &lt;= v' where x is the value of the feature in the input
+sample and v is one of the possible values of this feature. The functions that
+can be produced by a regression tree are all the piece-wise constant functions.
+          
+The ensemble of trees is produced by computing, in each step, a regression tree
+that approximates the gradient of the loss function, and adding it to the
+previous tree with coefficients that minimize the loss of the new tree. The
+output of the ensemble produced by MART on a given instance is the sum of the
+tree outputs.
+
+* In case of a binary classification problem, the output is converted to a
+  probability by using some form of calibration.>
+* In case of a regression problem, the output is the predicted value of the
+  function.
+* In case of a ranking problem, the instances are ordered by the output value of
+  the ensemble.
+
+For more information see:
+
+[Wikipedia: Gradient boosting (Gradient tree
+boosting).](https://en.wikipedia.org/wiki/Gradient_boosting#Gradient_tree_boosting)
+[Greedy function approximation: A gradient boosting
+machine.](https://projecteuclid.org/DPubS?service=UI&amp;version=1.0&amp;verb=Display&amp;handle=euclid.aos/1013203451)
\ No newline at end of file
diff --git a/src/Microsoft.ML.FastTree/FastTreeArguments.cs b/src/Microsoft.ML.FastTree/FastTreeArguments.cs
index ddea3f21f8..70ff4ecb77 100644
--- a/src/Microsoft.ML.FastTree/FastTreeArguments.cs
+++ b/src/Microsoft.ML.FastTree/FastTreeArguments.cs
@@ -55,14 +55,16 @@ public enum EarlyStoppingRankingMetric
     public sealed partial class FastTreeBinaryTrainer
     {
         /// <summary>
-        /// Options for the <see cref="FastTreeBinaryTrainer"/>.
+        /// Options for the <see cref="FastTreeBinaryTrainer"/> as used in
+        /// [FastTree(Options)](xref:"Microsoft.ML.TreeExtensions.FastTree(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.FastTree.FastTreeBinaryTrainer.Options)).
         /// </summary>
         [TlcModule.Component(Name = LoadNameValue, FriendlyName = UserNameValue, Desc = Summary)]
         public sealed class Options : BoostedTreeOptions, IFastTreeTrainerFactory
         {
 
             /// <summary>
-            /// Whether to use derivatives optimized for unbalanced training data.
+            /// Options for the <see cref="FastTreeBinaryTrainer"/> as used in
+            /// [FastTree(Options)](xref:"Microsoft.ML.TreeExtensions.FastTree(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.FastTree.FastTreeBinaryTrainer.Options)).
             /// </summary>
             [Argument(ArgumentType.LastOccurenceWins, HelpText = "Option for using derivatives optimized for unbalanced sets", ShortName = "us")]
             [TGUI(Label = "Optimize for unbalanced")]
diff --git a/src/Microsoft.ML.FastTree/FastTreeClassification.cs b/src/Microsoft.ML.FastTree/FastTreeClassification.cs
index 7e58d1c776..19c6e37714 100644
--- a/src/Microsoft.ML.FastTree/FastTreeClassification.cs
+++ b/src/Microsoft.ML.FastTree/FastTreeClassification.cs
@@ -111,30 +111,11 @@ private static IPredictorProducing<float> Create(IHostEnvironment env, ModelLoad
     /// |  |  |
     /// | -- | -- |
     /// | Machine learning task | Binary classification |
-    /// | Is normalization required? | Yes |
+    /// | Is normalization required? | No |
     /// | Is caching required? | No |
-    /// | Required NuGet in addition to Microsoft.ML | None |
+    /// | Required NuGet in addition to Microsoft.ML | Microsoft.ML.FastTree |
     ///
-    /// ### Training Algorithm Details
-    /// FastTree is an efficient implementation of the [MART](https://arxiv.org/abs/1505.01866) gradient boosting algorithm.
-    /// Gradient boosting is a machine learning technique for regression problems.
-    /// It builds each regression tree in a step-wise fashion, using a predefined loss function to measure the error for each step and corrects for it in the next.
-    /// So this prediction model is actually an ensemble of weaker prediction models.In regression problems, boosting builds a series of such trees in a step-wise fashion and then selects the optimal tree using an arbitrary differentiable loss function.
-    ///
-    /// MART learns an ensemble of regression trees, which is a decision tree with scalar values in its leaves.
-    /// A decision(or regression) tree is a binary tree-like flow chart, where at each interior node one decides which of the two child nodes to continue to based on one of the feature values from the input.
-    /// At each leaf node, a value is returned.In the interior nodes, the decision is based on the test 'x &lt;= v' where x is the value of the feature in the input sample and v is one of the possible values of this feature.
-    /// The functions that can be produced by a regression tree are all the piece-wise constant functions.
-    ///
-    /// The ensemble of trees is produced by computing, in each step, a regression tree that approximates the gradient of the loss function, and adding it to the previous tree with coefficients that minimize the loss of the new tree.
-    /// The output of the ensemble produced by MART on a given instance is the sum of the tree outputs.
-    /// In case of a binary classification problem, the output is converted to a probability by using some form of calibration.
-    /// In case of a regression problem, the output is the predicted value of the function.
-    /// In case of a ranking problem, the instances are ordered by the output value of the ensemble.
-    ///
-    /// For more information see:
-    /// [Wikipedia: Gradient boosting (Gradient tree boosting)](https://en.wikipedia.org/wiki/Gradient_boosting#Gradient_tree_boosting).
-    /// [Greedy function approximation: A gradient boosting machine.](https://projecteuclid.org/DPubS?service=UI&amp;version=1.0&amp;verb=Display&amp;handle=euclid.aos/1013203451)
+    /// [!include[algorithm](~/../docs/samples/docs/api-reference/algo-details-fasttree.md)]
     /// ]]>
     /// </format>
     /// </remarks>
diff --git a/src/Microsoft.ML.FastTree/RandomForestClassification.cs b/src/Microsoft.ML.FastTree/RandomForestClassification.cs
index 906cda14a7..df34a0f6fa 100644
--- a/src/Microsoft.ML.FastTree/RandomForestClassification.cs
+++ b/src/Microsoft.ML.FastTree/RandomForestClassification.cs
@@ -124,9 +124,9 @@ private static IPredictorProducing<float> Create(IHostEnvironment env, ModelLoad
     /// |  |  |
     /// | -- | -- |
     /// | Machine learning task | Binary classification |
-    /// | Is normalization required? | Yes |
+    /// | Is normalization required? | No |
     /// | Is caching required? | No |
-    /// | Required NuGet in addition to Microsoft.ML | None |
+    /// | Required NuGet in addition to Microsoft.ML | Microsoft.ML.FastTree |
     ///
     /// [!include[algorithm](~/../docs/samples/docs/api-reference/algo-details-fastforest.md)]
     /// ]]>
diff --git a/src/Microsoft.ML.FastTree/RandomForestRegression.cs b/src/Microsoft.ML.FastTree/RandomForestRegression.cs
index 2c487819ad..1dbf267c3a 100644
--- a/src/Microsoft.ML.FastTree/RandomForestRegression.cs
+++ b/src/Microsoft.ML.FastTree/RandomForestRegression.cs
@@ -255,7 +255,7 @@ ISchemaBindableMapper IQuantileRegressionPredictor.CreateMapper(Double[] quantil
     /// |  |  |
     /// | -- | -- |
     /// | Machine learning task | Regression |
-    /// | Is normalization required? | Yes |
+    /// | Is normalization required? | No |
     /// | Is caching required? | No |
     /// | Required NuGet in addition to Microsoft.ML | Microsoft.ML.FastTree |
     ///
diff --git a/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs b/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs
index 1dd610d8c9..7d752adb8d 100644
--- a/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs
+++ b/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs
@@ -68,11 +68,11 @@ public static FastTreeRegressionTrainer FastTree(this RegressionCatalog.Regressi
         }
 
         /// <summary>
-        /// Creates a <see cref="FastTreeBinaryTrainer"/>, which predicts a target using a decision tree binary classification model.
+        /// Create <see cref="FastTreeBinaryTrainer"/>, which predicts a target using a decision tree binary classification model.
         /// </summary>
         /// <param name="catalog">The <see cref="BinaryClassificationCatalog"/>.</param>
-        /// <param name="labelColumnName">The name of the label column. The column data must be <see cref="System.Boolean"/></param>
-        /// <param name="featureColumnName">The name of the feature column. The column data must be a known-sized vector of <see cref="System.Single"/></param>
+        /// <param name="labelColumnName">The name of the label column. The column data must be <see cref="System.Boolean"/>.</param>
+        /// <param name="featureColumnName">The name of the feature column. The column data must be a known-sized vector of <see cref="System.Single"/>.</param>
         /// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
         /// <param name="numberOfTrees">Total number of decision trees to create in the ensemble.</param>
         /// <param name="numberOfLeaves">The maximum number of leaves per decision tree.</param>
@@ -100,7 +100,7 @@ public static FastTreeBinaryTrainer FastTree(this BinaryClassificationCatalog.Bi
         }
 
         /// <summary>
-        /// Creates a <see cref="FastTreeBinaryTrainer"/>, which predicts a target using a decision tree binary classification model using advanced options.
+        /// Create <see cref="FastTreeBinaryTrainer"/> with advanced options, which predicts a target using a decision tree binary classification model.
         /// </summary>
         /// <param name="catalog">The <see cref="BinaryClassificationCatalog"/>.</param>
         /// <param name="options">Trainer options.</param>

From e62ef2835ae6914b7748be6d91f307d074ff8cd5 Mon Sep 17 00:00:00 2001
From: Zeeshan Siddiqui <mzs@microsoft.com>
Date: Fri, 19 Apr 2019 14:48:16 -0700
Subject: [PATCH 3/3] PR feedback.

---
 docs/api-reference/algo-details-fasttree.md    | 9 ++++-----
 src/Microsoft.ML.FastTree/FastTreeArguments.cs | 3 +--
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/docs/api-reference/algo-details-fasttree.md b/docs/api-reference/algo-details-fasttree.md
index b111faa96c..dd14727d22 100644
--- a/docs/api-reference/algo-details-fasttree.md
+++ b/docs/api-reference/algo-details-fasttree.md
@@ -14,7 +14,7 @@ scalar values in its leaves. A decision (or regression) tree is a binary
 tree-like flow chart, where at each interior node one decides which of the two
 child nodes to continue to based on one of the feature values from the input. At
 each leaf node, a value is returned. In the interior nodes, the decision is
-based on the test 'x &lt;= v' where x is the value of the feature in the input
+based on the test x <= v where x is the value of the feature in the input
 sample and v is one of the possible values of this feature. The functions that
 can be produced by a regression tree are all the piece-wise constant functions.
           
@@ -25,15 +25,14 @@ output of the ensemble produced by MART on a given instance is the sum of the
 tree outputs.
 
 * In case of a binary classification problem, the output is converted to a
-  probability by using some form of calibration.>
+  probability by using some form of calibration.
 * In case of a regression problem, the output is the predicted value of the
   function.
 * In case of a ranking problem, the instances are ordered by the output value of
   the ensemble.
 
 For more information see:
-
-[Wikipedia: Gradient boosting (Gradient tree
+* [Wikipedia: Gradient boosting (Gradient tree
 boosting).](https://en.wikipedia.org/wiki/Gradient_boosting#Gradient_tree_boosting)
-[Greedy function approximation: A gradient boosting
+* [Greedy function approximation: A gradient boosting
 machine.](https://projecteuclid.org/DPubS?service=UI&amp;version=1.0&amp;verb=Display&amp;handle=euclid.aos/1013203451)
\ No newline at end of file
diff --git a/src/Microsoft.ML.FastTree/FastTreeArguments.cs b/src/Microsoft.ML.FastTree/FastTreeArguments.cs
index 70ff4ecb77..8fdf4ae353 100644
--- a/src/Microsoft.ML.FastTree/FastTreeArguments.cs
+++ b/src/Microsoft.ML.FastTree/FastTreeArguments.cs
@@ -63,8 +63,7 @@ public sealed class Options : BoostedTreeOptions, IFastTreeTrainerFactory
         {
 
             /// <summary>
-            /// Options for the <see cref="FastTreeBinaryTrainer"/> as used in
-            /// [FastTree(Options)](xref:"Microsoft.ML.TreeExtensions.FastTree(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.FastTree.FastTreeBinaryTrainer.Options)).
+            /// Whether to use derivatives optimized for unbalanced training data.
             /// </summary>
             [Argument(ArgumentType.LastOccurenceWins, HelpText = "Option for using derivatives optimized for unbalanced sets", ShortName = "us")]
             [TGUI(Label = "Optimize for unbalanced")]