XML docs for Permutation feature importance for Binary, Multi-class and Ranking trainers. (#3455)

codemzs · web-flow · commit f829b341b0c3 · 2019-04-21T21:26:09.000-07:00
* XML docs for Permutation feature importance for Binary, Multi-class and Ranking trainers.

* PR feedback.

* PR feedback.

* PR feedback.

* PR feedback.

* PR feedback.

* Add features column to io md file for ranker..

* PR feedback.

* PR feedback.

* PR feedback.
diff --git a/docs/api-reference/io-columns-multiclass-classification.md b/docs/api-reference/io-columns-multiclass-classification.md
@@ -1,5 +1,5 @@
 ### Input and Output Columns
-The input label column data must be [key-typed](xref:Microsoft.ML.Data.KeyDataViewType) and the feature column must be a known-sized vector of <xref:System.Single>.
+The input label column data must be [key](xref:Microsoft.ML.Data.KeyDataViewType) type and the feature column must be a known-sized vector of <xref:System.Single>.
 
 This trainer outputs the following columns:
 
diff --git a/docs/api-reference/io-columns-ranking.md b/docs/api-reference/io-columns-ranking.md
@@ -1,6 +1,5 @@
 ### Input and Output Columns
-The input label column data must be <xref:System.Single> and input group column
-data must be <xref:System.UInt32>. This trainer outputs the following columns:
+The input label data type must be [key](xref:Microsoft.ML.Data.KeyDataViewType) type or <xref:System.Single>. The value of the label determines relevance, where higher values indicate higher relevance. If the label is a [key](xref:Microsoft.ML.Data.KeyDataViewType) type, then the key index is the relevance value, where the 0-index is the least relevant. If the label is a <xref:System.Single>, larger values indicate higher relevance. The feature column must be a known-sized vector of <xref:System.Single> and input row group column must be [key](xref:Microsoft.ML.Data.KeyDataViewType) type. This trainer outputs the following columns:
 
 | Output Column Name | Column Type | Description|
 | -- | -- | -- |
diff --git a/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs b/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs
@@ -127,7 +127,7 @@ public static FastTreeBinaryTrainer FastTree(this BinaryClassificationCatalog.Bi
         /// Create a <see cref="FastTreeRankingTrainer"/>, which ranks a series of inputs based on their relevancee, using a decision tree ranking model.
         /// </summary>
         /// <param name="catalog">The <see cref="RankingCatalog"/>.</param>
-        /// <param name="labelColumnName">The name of the label column. The column data must be <see cref="System.Single"/>.</param>
+        /// <param name="labelColumnName">The name of the label column. The column data must be <see cref="System.Single"/> or <see cref="KeyDataViewType"/>.</param>
         /// <param name="featureColumnName">The name of the feature column. The column data must be a known-sized vector of <see cref="System.Single"/>.</param>
         /// <param name="rowGroupColumnName">The name of the group column.</param>
         /// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
diff --git a/src/Microsoft.ML.LightGbm/LightGbmCatalog.cs b/src/Microsoft.ML.LightGbm/LightGbmCatalog.cs
@@ -123,7 +123,7 @@ public static LightGbmBinaryTrainer LightGbm(this BinaryClassificationCatalog.Bi
         /// Create <see cref="LightGbmRankingTrainer"/>, which predicts a target using a gradient boosting decision tree ranking model.
         /// </summary>
         /// <param name="catalog">The <see cref="RankingCatalog"/>.</param>
-        /// <param name="labelColumnName">The name of the label column. The column data must be <see cref="System.Single"/>.</param>
+        /// <param name="labelColumnName">The name of the label column. The column data must be <see cref="System.Single"/> or <see cref="KeyDataViewType"/>.</param>
         /// <param name="featureColumnName">The name of the feature column. The column data must be a known-sized vector of <see cref="System.Single"/>.</param>
         /// <param name="rowGroupColumnName">The name of the group column.</param>
         /// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
@@ -178,8 +178,8 @@ public static LightGbmRankingTrainer LightGbm(this RankingCatalog.RankingTrainer
         /// Create <see cref="LightGbmMulticlassTrainer"/>, which predicts a target using a gradient boosting decision tree multiclass classification model.
         /// </summary>
         /// <param name="catalog">The <see cref="MulticlassClassificationCatalog"/>.</param>
-        /// <param name="labelColumnName">The name of the label column.</param>
-        /// <param name="featureColumnName">The name of the feature column.</param>
+        /// <param name="labelColumnName">The name of the label column. The column data must be <see cref="KeyDataViewType"/>.</param>
+        /// <param name="featureColumnName">The name of the feature column. The column data must be a known-sized vector of <see cref="System.Single"/>.</param>
         /// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
         /// <param name="numberOfLeaves">The maximum number of leaves in one tree.</param>
         /// <param name="minimumExampleCountPerLeaf">The minimal number of data points required to form a new tree leaf.</param>
diff --git a/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs b/src/Microsoft.ML.Transforms/PermutationFeatureImportanceExtensions.cs
@@ -19,7 +19,7 @@ public static class PermutationFeatureImportanceExtensions
     {
         #region Regression
         /// <summary>
-        /// Permutation Feature Importance (PFI) for Regression
+        /// Permutation Feature Importance (PFI) for Regression.
         /// </summary>
         /// <remarks>
         /// <para>
@@ -32,14 +32,14 @@ public static class PermutationFeatureImportanceExtensions
         /// <para>
         /// PFI works by taking a labeled dataset, choosing a feature, and permuting the values
         /// for that feature across all the examples, so that each example now has a random value for the feature and
-        /// the original values for all other features. The evalution metric (e.g. R-squared) is then calculated
+        /// the original values for all other features. The evaluation metric (e.g. R-squared) is then calculated
         /// for this modified dataset, and the change in the evaluation metric from the original dataset is computed.
         /// The larger the change in the evaluation metric, the more important the feature is to the model.
         /// PFI works by performing this permutation analysis across all the features of a model, one after another.
         /// </para>
         /// <para>
         /// In this implementation, PFI computes the change in all possible regression evaluation metrics for each feature, and an
-        /// <code>ImmutableArray</code> of <code>RegressionMetrics</code> objects is returned. See the sample below for an
+        /// <see cref="ImmutableArray"/> of <see cref="RegressionMetrics"/> objects is returned. See the sample below for an
         /// example of working with these results to analyze the feature importance of a model.
         /// </para>
         /// </remarks>
@@ -53,7 +53,7 @@ public static class PermutationFeatureImportanceExtensions
         /// <param name="catalog">The regression catalog.</param>
         /// <param name="predictionTransformer">The model on which to evaluate feature importance.</param>
         /// <param name="data">The evaluation data set.</param>
-        /// <param name="labelColumnName">Label column name.</param>
+        /// <param name="labelColumnName">Label column name. The column data must be <see cref="System.Single"/>.</param>
         /// <param name="useFeatureWeightFilter">Use features weight to pre-filter features.</param>
         /// <param name="numberOfExamplesToUse">Limit the number of examples to evaluate on. <cref langword="null"/> means up to ~2 bln examples from <paramref param="data"/> will be used.</param>
         /// <param name="permutationCount">The number of permutations to perform.</param>
@@ -95,7 +95,7 @@ private static RegressionMetrics RegressionDelta(
 
         #region Binary Classification
         /// <summary>
-        /// Permutation Feature Importance (PFI) for Binary Classification
+        /// Permutation Feature Importance (PFI) for Binary Classification.
         /// </summary>
         /// <remarks>
         /// <para>
@@ -108,14 +108,14 @@ private static RegressionMetrics RegressionDelta(
         /// <para>
         /// PFI works by taking a labeled dataset, choosing a feature, and permuting the values
         /// for that feature across all the examples, so that each example now has a random value for the feature and
-        /// the original values for all other features. The evalution metric (e.g. AUC) is then calculated
+        /// the original values for all other features. The evaluation metric (e.g. AUC) is then calculated
         /// for this modified dataset, and the change in the evaluation metric from the original dataset is computed.
         /// The larger the change in the evaluation metric, the more important the feature is to the model.
         /// PFI works by performing this permutation analysis across all the features of a model, one after another.
         /// </para>
         /// <para>
         /// In this implementation, PFI computes the change in all possible binary classification evaluation metrics for each feature, and an
-        /// <code>ImmutableArray</code> of <code>BinaryClassificationMetrics</code> objects is returned. See the sample below for an
+        /// <see cref="ImmutableArray"/> of <see cref="BinaryClassificationMetrics"/> objects is returned. See the sample below for an
         /// example of working with these results to analyze the feature importance of a model.
         /// </para>
         /// </remarks>
@@ -129,7 +129,7 @@ private static RegressionMetrics RegressionDelta(
         /// <param name="catalog">The binary classification catalog.</param>
         /// <param name="predictionTransformer">The model on which to evaluate feature importance.</param>
         /// <param name="data">The evaluation data set.</param>
-        /// <param name="labelColumnName">Label column name.</param>
+        /// <param name="labelColumnName">Label column name. The column data must be <see cref="System.Boolean"/>.</param>
         /// <param name="useFeatureWeightFilter">Use features weight to pre-filter features.</param>
         /// <param name="numberOfExamplesToUse">Limit the number of examples to evaluate on. <cref langword="null"/> means up to ~2 bln examples from <paramref param="data"/> will be used.</param>
         /// <param name="permutationCount">The number of permutations to perform.</param>
@@ -175,7 +175,7 @@ private static BinaryClassificationMetrics BinaryClassifierDelta(
 
         #region Multiclass Classification
         /// <summary>
-        /// Permutation Feature Importance (PFI) for MulticlassClassification
+        /// Permutation Feature Importance (PFI) for MulticlassClassification.
         /// </summary>
         /// <remarks>
         /// <para>
@@ -188,14 +188,14 @@ private static BinaryClassificationMetrics BinaryClassifierDelta(
         /// <para>
         /// PFI works by taking a labeled dataset, choosing a feature, and permuting the values
         /// for that feature across all the examples, so that each example now has a random value for the feature and
-        /// the original values for all other features. The evalution metric (e.g. micro-accuracy) is then calculated
+        /// the original values for all other features. The evaluation metric (e.g. micro-accuracy) is then calculated
         /// for this modified dataset, and the change in the evaluation metric from the original dataset is computed.
         /// The larger the change in the evaluation metric, the more important the feature is to the model.
         /// PFI works by performing this permutation analysis across all the features of a model, one after another.
         /// </para>
         /// <para>
         /// In this implementation, PFI computes the change in all possible multiclass classification evaluation metrics for each feature, and an
-        /// <code>ImmutableArray</code> of <code>MulticlassClassificationMetrics</code> objects is returned. See the sample below for an
+        /// <see cref="ImmutableArray"/> of <see cref="MulticlassClassificationMetrics"/> objects is returned. See the sample below for an
         /// example of working with these results to analyze the feature importance of a model.
         /// </para>
         /// </remarks>
@@ -209,7 +209,7 @@ private static BinaryClassificationMetrics BinaryClassifierDelta(
         /// <param name="catalog">The clustering catalog.</param>
         /// <param name="predictionTransformer">The model on which to evaluate feature importance.</param>
         /// <param name="data">The evaluation data set.</param>
-        /// <param name="labelColumnName">Label column name.</param>
+        /// <param name="labelColumnName">Label column name. The column data must be <see cref="KeyDataViewType"/>.</param>
         /// <param name="useFeatureWeightFilter">Use features weight to pre-filter features.</param>
         /// <param name="numberOfExamplesToUse">Limit the number of examples to evaluate on. <cref langword="null"/> means up to ~2 bln examples from <paramref param="data"/> will be used.</param>
         /// <param name="permutationCount">The number of permutations to perform.</param>
@@ -260,7 +260,7 @@ private static MulticlassClassificationMetrics MulticlassClassificationDelta(
 
         #region Ranking
         /// <summary>
-        /// Permutation Feature Importance (PFI) for Ranking
+        /// Permutation Feature Importance (PFI) for Ranking.
         /// </summary>
         /// <remarks>
         /// <para>
@@ -273,14 +273,14 @@ private static MulticlassClassificationMetrics MulticlassClassificationDelta(
         /// <para>
         /// PFI works by taking a labeled dataset, choosing a feature, and permuting the values
         /// for that feature across all the examples, so that each example now has a random value for the feature and
-        /// the original values for all other features. The evalution metric (e.g. NDCG) is then calculated
+        /// the original values for all other features. The evaluation metric (e.g. NDCG) is then calculated
         /// for this modified dataset, and the change in the evaluation metric from the original dataset is computed.
         /// The larger the change in the evaluation metric, the more important the feature is to the model.
         /// PFI works by performing this permutation analysis across all the features of a model, one after another.
         /// </para>
         /// <para>
         /// In this implementation, PFI computes the change in all possible ranking evaluation metrics for each feature, and an
-        /// <code>ImmutableArray</code> of <code>RankingMetrics</code> objects is returned. See the sample below for an
+        /// <see cref="ImmutableArray"/> of <see cref="RankingMetrics"/> objects is returned. See the sample below for an
         /// example of working with these results to analyze the feature importance of a model.
         /// </para>
         /// </remarks>
@@ -294,7 +294,7 @@ private static MulticlassClassificationMetrics MulticlassClassificationDelta(
         /// <param name="catalog">The clustering catalog.</param>
         /// <param name="predictionTransformer">The model on which to evaluate feature importance.</param>
         /// <param name="data">The evaluation data set.</param>
-        /// <param name="labelColumnName">Label column name.</param>
+        /// <param name="labelColumnName">Label column name. The column data must be <see cref="System.Single"/> or <see cref="KeyDataViewType"/>.</param>
         /// <param name="rowGroupColumnName">GroupId column name</param>
         /// <param name="useFeatureWeightFilter">Use features weight to pre-filter features.</param>
         /// <param name="numberOfExamplesToUse">Limit the number of examples to evaluate on. <cref langword="null"/> means up to ~2 bln examples from <paramref param="data"/> will be used.</param>