diff --git a/docs/api-reference/algo-details-fasttree.md b/docs/api-reference/algo-details-fasttree.md new file mode 100644 index 0000000000..dd14727d22 --- /dev/null +++ b/docs/api-reference/algo-details-fasttree.md @@ -0,0 +1,38 @@ +### Training Algorithm Details +FastTree is an efficient implementation of the +[MART](https://arxiv.org/abs/1505.01866) gradient boosting algorithm. Gradient +boosting is a machine learning technique for regression problems. It builds each +regression tree in a step-wise fashion, using a predefined loss function to +measure the error for each step and corrects for it in the next. So this +prediction model is actually an ensemble of weaker prediction models. In +regression problems, boosting builds a series of such trees in a step-wise +fashion and then selects the optimal tree using an arbitrary differentiable loss +function. + +MART learns an ensemble of regression trees, which is a decision tree with +scalar values in its leaves. A decision (or regression) tree is a binary +tree-like flow chart, where at each interior node one decides which of the two +child nodes to continue to based on one of the feature values from the input. At +each leaf node, a value is returned. In the interior nodes, the decision is +based on the test x <= v where x is the value of the feature in the input +sample and v is one of the possible values of this feature. The functions that +can be produced by a regression tree are all the piece-wise constant functions. + +The ensemble of trees is produced by computing, in each step, a regression tree +that approximates the gradient of the loss function, and adding it to the +previous tree with coefficients that minimize the loss of the new tree. The +output of the ensemble produced by MART on a given instance is the sum of the +tree outputs. + +* In case of a binary classification problem, the output is converted to a + probability by using some form of calibration. +* In case of a regression problem, the output is the predicted value of the + function. +* In case of a ranking problem, the instances are ordered by the output value of + the ensemble. + +For more information see: +* [Wikipedia: Gradient boosting (Gradient tree +boosting).](https://en.wikipedia.org/wiki/Gradient_boosting#Gradient_tree_boosting) +* [Greedy function approximation: A gradient boosting +machine.](https://projecteuclid.org/DPubS?service=UI&version=1.0&verb=Display&handle=euclid.aos/1013203451) \ No newline at end of file diff --git a/src/Microsoft.ML.FastTree/FastTreeArguments.cs b/src/Microsoft.ML.FastTree/FastTreeArguments.cs index ddea3f21f8..8fdf4ae353 100644 --- a/src/Microsoft.ML.FastTree/FastTreeArguments.cs +++ b/src/Microsoft.ML.FastTree/FastTreeArguments.cs @@ -55,7 +55,8 @@ public enum EarlyStoppingRankingMetric public sealed partial class FastTreeBinaryTrainer { /// - /// Options for the . + /// Options for the as used in + /// [FastTree(Options)](xref:"Microsoft.ML.TreeExtensions.FastTree(Microsoft.ML.BinaryClassificationCatalog.BinaryClassificationTrainers,Microsoft.ML.Trainers.FastTree.FastTreeBinaryTrainer.Options)). /// [TlcModule.Component(Name = LoadNameValue, FriendlyName = UserNameValue, Desc = Summary)] public sealed class Options : BoostedTreeOptions, IFastTreeTrainerFactory diff --git a/src/Microsoft.ML.FastTree/FastTreeClassification.cs b/src/Microsoft.ML.FastTree/FastTreeClassification.cs index 78e0911957..19c6e37714 100644 --- a/src/Microsoft.ML.FastTree/FastTreeClassification.cs +++ b/src/Microsoft.ML.FastTree/FastTreeClassification.cs @@ -100,7 +100,28 @@ private static IPredictorProducing Create(IHostEnvironment env, ModelLoad /// /// The for training a decision tree binary classification model using FastTree. /// - /// + /// + /// + /// + /// + /// + /// + /// public sealed partial class FastTreeBinaryTrainer : BoostingFastTreeTrainerBase>, diff --git a/src/Microsoft.ML.FastTree/RandomForestClassification.cs b/src/Microsoft.ML.FastTree/RandomForestClassification.cs index 906cda14a7..df34a0f6fa 100644 --- a/src/Microsoft.ML.FastTree/RandomForestClassification.cs +++ b/src/Microsoft.ML.FastTree/RandomForestClassification.cs @@ -124,9 +124,9 @@ private static IPredictorProducing Create(IHostEnvironment env, ModelLoad /// | | | /// | -- | -- | /// | Machine learning task | Binary classification | - /// | Is normalization required? | Yes | + /// | Is normalization required? | No | /// | Is caching required? | No | - /// | Required NuGet in addition to Microsoft.ML | None | + /// | Required NuGet in addition to Microsoft.ML | Microsoft.ML.FastTree | /// /// [!include[algorithm](~/../docs/samples/docs/api-reference/algo-details-fastforest.md)] /// ]]> diff --git a/src/Microsoft.ML.FastTree/RandomForestRegression.cs b/src/Microsoft.ML.FastTree/RandomForestRegression.cs index 2c487819ad..1dbf267c3a 100644 --- a/src/Microsoft.ML.FastTree/RandomForestRegression.cs +++ b/src/Microsoft.ML.FastTree/RandomForestRegression.cs @@ -255,7 +255,7 @@ ISchemaBindableMapper IQuantileRegressionPredictor.CreateMapper(Double[] quantil /// | | | /// | -- | -- | /// | Machine learning task | Regression | - /// | Is normalization required? | Yes | + /// | Is normalization required? | No | /// | Is caching required? | No | /// | Required NuGet in addition to Microsoft.ML | Microsoft.ML.FastTree | /// diff --git a/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs b/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs index fb1fb69cfb..7d752adb8d 100644 --- a/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs +++ b/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs @@ -68,11 +68,11 @@ public static FastTreeRegressionTrainer FastTree(this RegressionCatalog.Regressi } /// - /// Predict a target using a decision tree binary classification model trained with the . + /// Create , which predicts a target using a decision tree binary classification model. /// /// The . - /// The name of the label column. - /// The name of the feature column. + /// The name of the label column. The column data must be . + /// The name of the feature column. The column data must be a known-sized vector of . /// The name of the example weight column (optional). /// Total number of decision trees to create in the ensemble. /// The maximum number of leaves per decision tree. @@ -100,7 +100,7 @@ public static FastTreeBinaryTrainer FastTree(this BinaryClassificationCatalog.Bi } /// - /// Predict a target using a decision tree binary classification model trained with the and advanced options. + /// Create with advanced options, which predicts a target using a decision tree binary classification model. /// /// The . /// Trainer options.