From 36186c189213fc26ffa80a3fc5a50890056b3144 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Thu, 18 Apr 2019 02:51:08 -0700 Subject: [PATCH 1/7] XML documentation for FastForest binary classification. --- .../RandomForestClassification.cs | 46 ++++++++++++++++++- .../TreeTrainersCatalog.cs | 8 ++-- 2 files changed, 48 insertions(+), 6 deletions(-) diff --git a/src/Microsoft.ML.FastTree/RandomForestClassification.cs b/src/Microsoft.ML.FastTree/RandomForestClassification.cs index f226d0024f..28032c7ecb 100644 --- a/src/Microsoft.ML.FastTree/RandomForestClassification.cs +++ b/src/Microsoft.ML.FastTree/RandomForestClassification.cs @@ -113,12 +113,54 @@ private static IPredictorProducing Create(IHostEnvironment env, ModelLoad /// /// The for training a decision tree binary classification model using Fast Forest. /// - /// + /// + /// + /// + /// + /// + /// + /// public sealed partial class FastForestBinaryTrainer : RandomForestTrainerBase, FastForestBinaryModelParameters> { /// - /// Options for the . + /// Options for the as used in + /// [FastForest(Options)](xref:Microsoft.ML.TreeExtensions.FastForest(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.FastTree.FastForestBinaryTrainer.Options)). /// public sealed class Options : FastForestOptionsBase { diff --git a/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs b/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs index e948d7363b..b365058b48 100644 --- a/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs +++ b/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs @@ -384,11 +384,11 @@ public static FastForestRegressionTrainer FastForest(this RegressionCatalog.Regr } /// - /// Predict a target using a decision tree regression model trained with the . + /// Creates , which predicts a target using a decision tree regression model. /// /// The . - /// The name of the label column. - /// The name of the feature column. + /// The name of the label column. The column data must be . + /// The name of the feature column. The column data must be a known-sized vector of . /// The name of the example weight column (optional). /// Total number of decision trees to create in the ensemble. /// The maximum number of leaves per decision tree. @@ -414,7 +414,7 @@ public static FastForestBinaryTrainer FastForest(this BinaryClassificationCatalo } /// - /// Predict a target using a decision tree regression model trained with the and advanced options. + /// Creates , which predicts a target using a decision tree regression model with advanced options. /// /// The . /// Trainer options. From 4b106f1d1cdea460e6ab3ba39a38e33d023a6e91 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Fri, 19 Apr 2019 11:49:10 -0700 Subject: [PATCH 2/7] PR feedback. --- docs/api-reference/fastforest.md | 31 ++++++++++++++++++ .../RandomForestClassification.cs | 32 ++----------------- .../TreeTrainersCatalog.cs | 4 +-- 3 files changed, 35 insertions(+), 32 deletions(-) create mode 100644 docs/api-reference/fastforest.md diff --git a/docs/api-reference/fastforest.md b/docs/api-reference/fastforest.md new file mode 100644 index 0000000000..fb9998d93d --- /dev/null +++ b/docs/api-reference/fastforest.md @@ -0,0 +1,31 @@ +### Training Algorithm Details +Decision trees are non-parametric models that perform a sequence of simple tests +on inputs. This decision procedure maps them to outputs found in the training +dataset whose inputs were similar to the instance being processed. A decision is +made at each node of the binary tree data structure based on a measure of +similarity that maps each instance recursively through the branches of the tree +until the appropriate leaf node is reached and the output decision returned. + +Decision trees have several advantages: +* They are efficient in both computation and memory usage during training and + prediction. +* They can represent non-linear decision boundaries. +* They perform integrated feature selection and classification. +* They are resilient in the presence of noisy features. + +Fast forest is a random forest implementation. The model consists of an ensemble +of decision trees. Each tree in a decision forest outputs a Gaussian +distribution by way of prediction. An aggregation is performed over the ensemble +of trees to find a Gaussian distribution closest to the combined distribution +for all trees in the model. This decision forest classifier consists of an +ensemble of decision trees. + +Generally, ensemble models provide better coverage and accuracy than single +decision trees. Each tree in a decision forest outputs a Gaussian distribution. + +For more see: +* [Wikipedia: Random forest](https://en.wikipedia.org/wiki/Random_forest) +* [Quantile regression + forest](http://jmlr.org/papers/volume7/meinshausen06a/meinshausen06a.pdf) +* [From Stumps to Trees to + Forests](https://blogs.technet.microsoft.com/machinelearning/2014/09/10/from-stumps-to-trees-to-forests/) \ No newline at end of file diff --git a/src/Microsoft.ML.FastTree/RandomForestClassification.cs b/src/Microsoft.ML.FastTree/RandomForestClassification.cs index 28032c7ecb..b4421b7bb4 100644 --- a/src/Microsoft.ML.FastTree/RandomForestClassification.cs +++ b/src/Microsoft.ML.FastTree/RandomForestClassification.cs @@ -119,36 +119,8 @@ private static IPredictorProducing Create(IHostEnvironment env, ModelLoad /// or [FastForest(Options)](xref:Microsoft.ML.TreeExtensions.FastForest(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.FastTree.FastForestBinaryTrainer.Options)). /// /// [!include[io](~/../docs/samples/docs/api-reference/io-columns-binary-classification.md)] - /// - /// ### Trainer Characteristics - /// | | | - /// | -- | -- | - /// | Machine learning task | Binary classification | - /// | Is normalization required? | Yes | - /// | Is caching required? | No | - /// | Required NuGet in addition to Microsoft.ML | None | - /// - /// ### Training Algorithm Details - /// Decision trees are non-parametric models that perform a sequence of simple tests on inputs. - /// This decision procedure maps them to outputs found in the training dataset whose inputs were similar to the instance being processed. - /// A decision is made at each node of the binary tree data structure based on a measure of similarity that maps each instance recursively through the branches of the tree until the appropriate leaf node is reached and the output decision returned. - /// - /// Decision trees have several advantages: - /// They are efficient in both computation and memory usage during training and prediction. - /// They can represent non-linear decision boundaries. - /// They perform integrated feature selection and classification. - /// They are resilient in the presence of noisy features. - /// - /// Fast forest is a random forest implementation. - /// The model consists of an ensemble of decision trees. Each tree in a decision forest outputs a Gaussian distribution by way of prediction. - /// An aggregation is performed over the ensemble of trees to find a Gaussian distribution closest to the combined distribution for all trees in the model. - /// This decision forest classifier consists of an ensemble of decision trees. - /// - /// Generally, ensemble models provide better coverage and accuracy than single decision trees. Each tree in a decision forest outputs a Gaussian distribution. - /// For more see: - /// [Wikipedia: Random forest](https://en.wikipedia.org/wiki/Random_forest) - /// [Quantile regression forest](http://jmlr.org/papers/volume7/meinshausen06a/meinshausen06a.pdf) - /// [From Stumps to Trees to Forests](https://blogs.technet.microsoft.com/machinelearning/2014/09/10/from-stumps-to-trees-to-forests/) + /// + /// [!include[io](~/../docs/samples/docs/api-reference/fastforest.md)] /// ]]> /// /// diff --git a/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs b/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs index b365058b48..c556957d58 100644 --- a/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs +++ b/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs @@ -388,7 +388,7 @@ public static FastForestRegressionTrainer FastForest(this RegressionCatalog.Regr /// /// The . /// The name of the label column. The column data must be . - /// The name of the feature column. The column data must be a known-sized vector of . + /// The name of the feature column. The column data must be a known-sized vector of . /// The name of the example weight column (optional). /// Total number of decision trees to create in the ensemble. /// The maximum number of leaves per decision tree. @@ -414,7 +414,7 @@ public static FastForestBinaryTrainer FastForest(this BinaryClassificationCatalo } /// - /// Creates , which predicts a target using a decision tree regression model with advanced options. + /// Create with , which predicts a target using a decision tree regression model. /// /// The . /// Trainer options. From 65790907adfe5bd65fe4284368ba0255bf644f6f Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Fri, 19 Apr 2019 11:49:29 -0700 Subject: [PATCH 3/7] ws. --- src/Microsoft.ML.FastTree/RandomForestClassification.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Microsoft.ML.FastTree/RandomForestClassification.cs b/src/Microsoft.ML.FastTree/RandomForestClassification.cs index b4421b7bb4..e3f599659d 100644 --- a/src/Microsoft.ML.FastTree/RandomForestClassification.cs +++ b/src/Microsoft.ML.FastTree/RandomForestClassification.cs @@ -119,7 +119,7 @@ private static IPredictorProducing Create(IHostEnvironment env, ModelLoad /// or [FastForest(Options)](xref:Microsoft.ML.TreeExtensions.FastForest(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.FastTree.FastForestBinaryTrainer.Options)). /// /// [!include[io](~/../docs/samples/docs/api-reference/io-columns-binary-classification.md)] - /// + /// /// [!include[io](~/../docs/samples/docs/api-reference/fastforest.md)] /// ]]> /// From b29d3530484cf8f17636249ca3670f632c32b219 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Fri, 19 Apr 2019 11:51:11 -0700 Subject: [PATCH 4/7] PR feedback. --- src/Microsoft.ML.FastTree/RandomForestClassification.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Microsoft.ML.FastTree/RandomForestClassification.cs b/src/Microsoft.ML.FastTree/RandomForestClassification.cs index e3f599659d..6ebeb6bd0e 100644 --- a/src/Microsoft.ML.FastTree/RandomForestClassification.cs +++ b/src/Microsoft.ML.FastTree/RandomForestClassification.cs @@ -120,7 +120,7 @@ private static IPredictorProducing Create(IHostEnvironment env, ModelLoad /// /// [!include[io](~/../docs/samples/docs/api-reference/io-columns-binary-classification.md)] /// - /// [!include[io](~/../docs/samples/docs/api-reference/fastforest.md)] + /// [!include[algorithm](~/../docs/samples/docs/api-reference/fastforest.md)] /// ]]> /// /// From 3c1a0089ea064153a89be96218974a93db8759f5 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Fri, 19 Apr 2019 11:52:38 -0700 Subject: [PATCH 5/7] PR feedback. --- src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs b/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs index c556957d58..2837a176e6 100644 --- a/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs +++ b/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs @@ -384,7 +384,7 @@ public static FastForestRegressionTrainer FastForest(this RegressionCatalog.Regr } /// - /// Creates , which predicts a target using a decision tree regression model. + /// Create , which predicts a target using a decision tree regression model. /// /// The . /// The name of the label column. The column data must be . From 57b223a6a896324095df504524165583634cdc42 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Fri, 19 Apr 2019 11:55:07 -0700 Subject: [PATCH 6/7] PR feedback. --- src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs b/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs index 2837a176e6..c7b3014000 100644 --- a/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs +++ b/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs @@ -414,7 +414,7 @@ public static FastForestBinaryTrainer FastForest(this BinaryClassificationCatalo } /// - /// Create with , which predicts a target using a decision tree regression model. + /// Create with advanced options, which predicts a target using a decision tree regression model. /// /// The . /// Trainer options. From 521dd6f53cf3eefb0c7268e0b223a06919517104 Mon Sep 17 00:00:00 2001 From: Zeeshan Siddiqui Date: Fri, 19 Apr 2019 11:59:49 -0700 Subject: [PATCH 7/7] PR feedback. --- .../api-reference/{fastforest.md => algo-details-fastforest.md} | 0 src/Microsoft.ML.FastTree/RandomForestClassification.cs | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename docs/api-reference/{fastforest.md => algo-details-fastforest.md} (100%) diff --git a/docs/api-reference/fastforest.md b/docs/api-reference/algo-details-fastforest.md similarity index 100% rename from docs/api-reference/fastforest.md rename to docs/api-reference/algo-details-fastforest.md diff --git a/src/Microsoft.ML.FastTree/RandomForestClassification.cs b/src/Microsoft.ML.FastTree/RandomForestClassification.cs index 6ebeb6bd0e..92b6f7cc1d 100644 --- a/src/Microsoft.ML.FastTree/RandomForestClassification.cs +++ b/src/Microsoft.ML.FastTree/RandomForestClassification.cs @@ -120,7 +120,7 @@ private static IPredictorProducing Create(IHostEnvironment env, ModelLoad /// /// [!include[io](~/../docs/samples/docs/api-reference/io-columns-binary-classification.md)] /// - /// [!include[algorithm](~/../docs/samples/docs/api-reference/fastforest.md)] + /// [!include[algorithm](~/../docs/samples/docs/api-reference/algo-details-fastforest.md)] /// ]]> /// ///