diff --git a/docs/api-reference/algo-details-fastforest.md b/docs/api-reference/algo-details-fastforest.md new file mode 100644 index 0000000000..fb9998d93d --- /dev/null +++ b/docs/api-reference/algo-details-fastforest.md @@ -0,0 +1,31 @@ +### Training Algorithm Details +Decision trees are non-parametric models that perform a sequence of simple tests +on inputs. This decision procedure maps them to outputs found in the training +dataset whose inputs were similar to the instance being processed. A decision is +made at each node of the binary tree data structure based on a measure of +similarity that maps each instance recursively through the branches of the tree +until the appropriate leaf node is reached and the output decision returned. + +Decision trees have several advantages: +* They are efficient in both computation and memory usage during training and + prediction. +* They can represent non-linear decision boundaries. +* They perform integrated feature selection and classification. +* They are resilient in the presence of noisy features. + +Fast forest is a random forest implementation. The model consists of an ensemble +of decision trees. Each tree in a decision forest outputs a Gaussian +distribution by way of prediction. An aggregation is performed over the ensemble +of trees to find a Gaussian distribution closest to the combined distribution +for all trees in the model. This decision forest classifier consists of an +ensemble of decision trees. + +Generally, ensemble models provide better coverage and accuracy than single +decision trees. Each tree in a decision forest outputs a Gaussian distribution. + +For more see: +* [Wikipedia: Random forest](https://en.wikipedia.org/wiki/Random_forest) +* [Quantile regression + forest](http://jmlr.org/papers/volume7/meinshausen06a/meinshausen06a.pdf) +* [From Stumps to Trees to + Forests](https://blogs.technet.microsoft.com/machinelearning/2014/09/10/from-stumps-to-trees-to-forests/) \ No newline at end of file diff --git a/src/Microsoft.ML.FastTree/RandomForestClassification.cs b/src/Microsoft.ML.FastTree/RandomForestClassification.cs index f226d0024f..92b6f7cc1d 100644 --- a/src/Microsoft.ML.FastTree/RandomForestClassification.cs +++ b/src/Microsoft.ML.FastTree/RandomForestClassification.cs @@ -113,12 +113,26 @@ private static IPredictorProducing Create(IHostEnvironment env, ModelLoad /// /// The for training a decision tree binary classification model using Fast Forest. /// - /// + /// + /// + /// + /// + /// + /// + /// public sealed partial class FastForestBinaryTrainer : RandomForestTrainerBase, FastForestBinaryModelParameters> { /// - /// Options for the . + /// Options for the as used in + /// [FastForest(Options)](xref:Microsoft.ML.TreeExtensions.FastForest(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.FastTree.FastForestBinaryTrainer.Options)). /// public sealed class Options : FastForestOptionsBase { diff --git a/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs b/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs index e948d7363b..c7b3014000 100644 --- a/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs +++ b/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs @@ -384,11 +384,11 @@ public static FastForestRegressionTrainer FastForest(this RegressionCatalog.Regr } /// - /// Predict a target using a decision tree regression model trained with the . + /// Create , which predicts a target using a decision tree regression model. /// /// The . - /// The name of the label column. - /// The name of the feature column. + /// The name of the label column. The column data must be . + /// The name of the feature column. The column data must be a known-sized vector of . /// The name of the example weight column (optional). /// Total number of decision trees to create in the ensemble. /// The maximum number of leaves per decision tree. @@ -414,7 +414,7 @@ public static FastForestBinaryTrainer FastForest(this BinaryClassificationCatalo } /// - /// Predict a target using a decision tree regression model trained with the and advanced options. + /// Create with advanced options, which predicts a target using a decision tree regression model. /// /// The . /// Trainer options.