Towards #3204 - documentation for FeatureContributionCalculatingEstimator (#3384)

yaeldMS · web-flow · commit b9a0b070336a · 2019-04-19T17:20:02.000-07:00
* Documentation for FeatureContributionEstimator

* Address code review comments

* Address code review comments
diff --git a/src/Microsoft.ML.Data/Transforms/ExplainabilityCatalog.cs b/src/Microsoft.ML.Data/Transforms/ExplainabilityCatalog.cs
@@ -14,10 +14,10 @@ namespace Microsoft.ML
     public static class ExplainabilityCatalog
     {
         /// <summary>
-        /// Feature Contribution Calculation computes model-specific contribution scores for each feature.
-        /// Note that this functionality is not supported by all the models. See <see cref="FeatureContributionCalculatingTransformer"/> for a list of the suported models.
+        /// Create a <see cref="FeatureContributionCalculatingEstimator"/> that computes model-specific contribution scores for
+        /// each feature of the input vector.
         /// </summary>
-        /// <param name="catalog">The model explainability operations catalog.</param>
+        /// <param name="catalog">The transforms catalog.</param>
         /// <param name="predictionTransformer">A <see cref="ISingleFeaturePredictionTransformer{TModel}"/> that supports Feature Contribution Calculation,
         /// and which will also be used for scoring.</param>
         /// <param name="numberOfPositiveContributions">The number of positive contributions to report, sorted from highest magnitude to lowest magnitude.
@@ -40,10 +40,10 @@ public static FeatureContributionCalculatingEstimator CalculateFeatureContributi
             => new FeatureContributionCalculatingEstimator(CatalogUtils.GetEnvironment(catalog), predictionTransformer.Model, numberOfPositiveContributions, numberOfNegativeContributions, predictionTransformer.FeatureColumnName, normalize);
 
         /// <summary>
-        /// Feature Contribution Calculation computes model-specific contribution scores for each feature.
-        /// Note that this functionality is not supported by all the models. See <see cref="FeatureContributionCalculatingTransformer"/> for a list of the suported models.
+        /// Create a <see cref="FeatureContributionCalculatingEstimator"/> that computes model-specific contribution scores for
+        /// each feature of the input vector.
         /// </summary>
-        /// <param name="catalog">The model explainability operations catalog.</param>
+        /// <param name="catalog">The transforms catalog.</param>
         /// <param name="predictionTransformer">A <see cref="ISingleFeaturePredictionTransformer{TModel}"/> that supports Feature Contribution Calculation,
         /// and which will also be used for scoring.</param>
         /// <param name="numberOfPositiveContributions">The number of positive contributions to report, sorted from highest magnitude to lowest magnitude.
diff --git a/src/Microsoft.ML.Data/Transforms/FeatureContributionCalculationTransformer.cs b/src/Microsoft.ML.Data/Transforms/FeatureContributionCalculationTransformer.cs
@@ -26,47 +26,8 @@
 namespace Microsoft.ML.Transforms
 {
     /// <summary>
-    /// The FeatureContributionCalculationTransformer computes model-specific per-feature contributions to the score of each example.
-    /// See the list of currently supported models below.
+    /// <see cref="ITransformer"/> resulting from fitting a <see cref="FeatureContributionCalculatingEstimator"/>.
     /// </summary>
-    /// <remarks>
-    /// <para>
-    /// Scoring a dataset with a trained model produces a score, or prediction, for each example. To understand and explain these predictions
-    /// it can be useful to inspect which features influenced them most significantly. FeatureContributionCalculationTransformer computes a model-specific
-    /// list of per-feature contributions to the score for each example. These contributions can be positive (they make the score higher) or negative
-    /// (they make the score lower).
-    /// </para>
-    /// <para>
-    /// Feature Contribution Calculation is currently supported for the following models:
-    ///     Regression:
-    ///         OrdinaryLeastSquares, StochasticDualCoordinateAscent (SDCA), OnlineGradientDescent, PoissonRegression,
-    ///         GeneralizedAdditiveModels (GAM), LightGbm, FastTree, FastForest, FastTreeTweedie
-    ///     Binary Classification:
-    ///         AveragedPerceptron, LinearSupportVectorMachines, LogisticRegression, StochasticDualCoordinateAscent (SDCA),
-    ///         StochasticGradientDescent (SGD), SymbolicStochasticGradientDescent, GeneralizedAdditiveModels (GAM),
-    ///         FastForest, FastTree, LightGbm
-    ///     Ranking:
-    ///         FastTree, LightGbm
-    /// </para>
-    /// <para>
-    /// For linear models, the contribution of a given feature is equal to the product of feature value times the corresponding weight. Similarly,
-    /// for Generalized Additive Models (GAM), the contribution of a feature is equal to the shape function for the given feature evaluated at
-    /// the feature value.
-    /// </para>
-    /// <para>
-    /// For tree-based models, the calculation of feature contribution essentially consists in determining which splits in the tree have the most impact
-    /// on the final score and assigning the value of the impact to the features determining the split. More precisely, the contribution of a feature
-    /// is equal to the change in score produced by exploring the opposite sub-tree every time a decision node for the given feature is encountered.
-    /// Consider a simple case with a single decision tree that has a decision node for the binary feature F1. Given an example that has feature F1
-    /// equal to true, we can calculate the score it would have obtained if we chose the subtree corresponding to the feature F1 being equal to false
-    /// while keeping the other features constant. The contribution of feature F1 for the given example is the difference between the original score
-    /// and the score obtained by taking the opposite decision at the node corresponding to feature F1. This algorithm extends naturally to models with
-    /// many decision trees.
-    /// </para>
-    /// <para>
-    /// See the sample below for an example of how to compute feature importance using the FeatureContributionCalculatingTransformer.
-    /// </para>
-    /// </remarks>
     public sealed class FeatureContributionCalculatingTransformer : OneToOneTransformerBase
     {
         internal sealed class Options : TransformInputBase
@@ -266,9 +227,67 @@ private Delegate GetValueGetter<TSrc>(DataViewRow input, int colSrc)
     }
 
     /// <summary>
-    /// Estimator producing a FeatureContributionCalculatingTransformer which scores the model on an input dataset and
-    /// computes model-specific contribution scores for each feature.
+    /// Computes model-specific per-feature contributions to the score of each input vector.
     /// </summary>
+    /// <remarks>
+    /// <format type="text/markdown"><![CDATA[
+    ///
+    /// ###  Estimator Characteristics
+    /// |  |  |
+    /// | -- | -- |
+    /// | Does this estimator need to look at the data to train its parameters? | No |
+    /// | Input column data type | Known-sized vector of <xref:System.Single> |
+    /// | Output column data type | Known-sized vector of <xref:System.Single> |
+    ///
+    /// Scoring a dataset with a trained model produces a score, or prediction, for each example. To understand and explain these predictions
+    /// it can be useful to inspect which features influenced them most significantly. This transformer computes a model-specific
+    /// list of per-feature contributions to the score for each example. These contributions can be positive (they make the score higher) or negative
+    /// (they make the score lower).
+    ///
+    /// Feature Contribution Calculation is currently supported for the following models:
+    /// - Regression:
+    ///   - OlsTrainer
+    ///   - SdcaRegressionTrainer
+    ///   - OnlineGradientDescentTrainer
+    ///   - LbfgsPoissonRegressionTrainer
+    ///   - GamRegressionTrainer
+    ///   - LightGbmRegressionTrainer
+    ///   - FastTreeRegressionTrainer
+    ///   - FastForestRegressionTrainer
+    ///   - FastTreeTweedieTrainer
+    /// - Binary Classification:
+    ///   - AveragedPerceptronTrainer
+    ///   - LinearSvmTrainer
+    ///   - LbfgsLogisticRegressionBinaryTrainer
+    ///   - SdcaNonCalibratedBinaryTrainer
+    ///   - SdcaLogisticRegressionBinaryTrainer
+    ///   - SgdCalibratedTrainer
+    ///   - SgdNonCalibratedTrainer
+    ///   - SymbolicSgdLogisticRegressionBinaryTrainer
+    ///   - GamBinaryTrainer
+    ///   - FastForestBinaryTrainer
+    ///   - FastTreeBinaryTrainer
+    ///   - LightGbmBinaryTrainer
+    /// - Ranking:
+    ///   - FastTreeRankingTrainer
+    ///   - LightGbmRankingTrainer
+    ///
+    /// For linear models, the contribution of a given feature is equal to the product of feature value times the corresponding weight. Similarly,
+    /// for Generalized Additive Models (GAM), the contribution of a feature is equal to the shape function for the given feature evaluated at
+    /// the feature value.
+    ///
+    /// For tree-based models, the calculation of feature contribution essentially consists in determining which splits in the tree have the most impact
+    /// on the final score and assigning the value of the impact to the features determining the split. More precisely, the contribution of a feature
+    /// is equal to the change in score produced by exploring the opposite sub-tree every time a decision node for the given feature is encountered.
+    /// Consider a simple case with a single decision tree that has a decision node for the binary feature F1. Given an example that has feature F1
+    /// equal to true, we can calculate the score it would have obtained if we chose the subtree corresponding to the feature F1 being equal to false
+    /// while keeping the other features constant. The contribution of feature F1 for the given example is the difference between the original score
+    /// and the score obtained by taking the opposite decision at the node corresponding to feature F1. This algorithm extends naturally to models with
+    /// many decision trees.
+    /// ]]></format>
+    /// </remarks>
+    /// <seealso cref="ExplainabilityCatalog.CalculateFeatureContribution(TransformsCatalog, ISingleFeaturePredictionTransformer{ICalculateFeatureContribution}, int, int, bool)"/>
+    /// <seealso cref="ExplainabilityCatalog.CalculateFeatureContribution{TModelParameters, TCalibrator}(TransformsCatalog, ISingleFeaturePredictionTransformer{Calibrators.CalibratedModelParametersBase{TModelParameters, TCalibrator}}, int, int, bool)"/>
     public sealed class FeatureContributionCalculatingEstimator : TrivialEstimator<FeatureContributionCalculatingTransformer>
     {
         private readonly string _featureColumn;