PR feedback.:

codemzs · codemzs · commit 029b34e0c5db · 2019-04-21T10:09:00.000-07:00
diff --git a/docs/api-reference/io-columns-anomaly-detection.md b/docs/api-reference/io-columns-anomaly-detection.md
@@ -0,0 +1,6 @@
+### Input and Output Columns
+The input features column data must be a known-sized vector of <xref:System.Single>. This trainer outputs the following columns:
+
+| Output Column Name | Column Type | Description|
+| -- | -- | -- |
+| `Score` | <xref:System.Single> | The non-negative, unbounded score that was calculated by the anomaly detection model.|
diff --git a/docs/api-reference/io-columns-binary-classification.md b/docs/api-reference/io-columns-binary-classification.md
@@ -1,8 +1,9 @@
 ### Input and Output Columns
-The input label column data must be <xref:System.Boolean>. This trainer outputs the following columns:
+The input label column data must be <xref:System.Boolean>.
+The input features column data must be a known-sized vector of <xref:System.Single>. This trainer outputs the following columns:
 
 | Output Column Name | Column Type | Description|
 | -- | -- | -- |
-| `Score` | <xref:System.Single> | The unbounded score that was calculated by the trainer to determine the prediction.|
-| `PredictedLabel` | <xref:System.Boolean> | The label predicted by the trainer. `false` maps to negative score and `true` maps to positive score.|
-| `Probability` | <xref:System.Single> | The probability of having true as the label. Probability value is in range [0, 1].||
+| `Score` | <xref:System.Single> | The unbounded score that was calculated by the model.|
+| `PredictedLabel` | <xref:System.Boolean> | The predicted label, based on the sign of the score. A negative score maps to `false` and a positive score maps to `true`.|
+| `Probability` | <xref:System.Single> | The probability calculated by calibrating the score of having true as the label. Probability value is in range [0, 1].||
diff --git a/src/Microsoft.ML.PCA/PCACatalog.cs b/src/Microsoft.ML.PCA/PCACatalog.cs
@@ -40,11 +40,12 @@ internal static PrincipalComponentAnalyzer ProjectToPrincipalComponents(this Tra
             => new PrincipalComponentAnalyzer(CatalogUtils.GetEnvironment(catalog), columns);
 
         /// <summary>
-        /// Create <see cref="RandomizedPcaTrainer"/>, which trains an approximate principal component analysis (PCA) model using randomized SVD algorithm.
+        /// Create <see cref="RandomizedPcaTrainer"/>, which trains an approximate principal component analysis (PCA) model using randomized singular value decomposition (SVD) algorithm.
         /// </summary>
         /// <param name="catalog">The anomaly detection catalog trainer object.</param>
         /// <param name="featureColumnName">The name of the feature column. The column data must be a known-sized vector of <see cref="System.Single"/>.</param>
-        /// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
+        /// <param name="exampleWeightColumnName">The name of the example weight column (optional). To use the weight column, the column data
+        /// must be of type <see cref="System.Single"/>.</param>
         /// <param name="rank">The number of components in the PCA.</param>
         /// <param name="oversampling">Oversampling parameter for randomized PCA training.</param>
         /// <param name="ensureZeroMean">If enabled, data is centered to be zero mean.</param>
@@ -69,7 +70,7 @@ public static RandomizedPcaTrainer RandomizedPca(this AnomalyDetectionCatalog.An
         }
 
         /// <summary>
-        /// Create <see cref="RandomizedPcaTrainer"/> using advanced options, which trains an approximate principal component analysis (PCA) model using randomized SVD algorithm.
+        /// Create <see cref="RandomizedPcaTrainer"/> with advanced options, which trains an approximate principal component analysis (PCA) model using randomized singular value decomposition (SVD) algorithm.
         /// </summary>
         /// <param name="catalog">The anomaly detection catalog trainer object.</param>
         /// <param name="options">Advanced options to the algorithm.</param>
diff --git a/src/Microsoft.ML.PCA/PcaTrainer.cs b/src/Microsoft.ML.PCA/PcaTrainer.cs
@@ -40,7 +40,7 @@ namespace Microsoft.ML.Trainers
     /// To create this trainer, use [RandomizedPca](xref:Microsoft.ML.PcaCatalog.RandomizedPca(Microsoft.ML.AnomalyDetectionCatalog.AnomalyDetectionTrainers,System.String,System.String,System.Int32,System.Int32,System.Boolean,System.Nullable{System.Int32}))
     /// or [RandomizedPca(Options)](xref:Microsoft.ML.PcaCatalog.RandomizedPca(Microsoft.ML.AnomalyDetectionCatalog.AnomalyDetectionTrainers,Microsoft.ML.Trainers.RandomizedPcaTrainer.Options)).
     ///
-    /// [!include[io](~/../docs/samples/docs/api-reference/io-columns-regression.md)]
+    /// [!include[io](~/../docs/samples/docs/api-reference/io-anomaly-detection.md)]
     ///
     /// ### Trainer Characteristics
     /// |  |  |
@@ -51,12 +51,26 @@ namespace Microsoft.ML.Trainers
     /// | Required NuGet in addition to Microsoft.ML | None |
     ///
     /// ### Training Algorithm Details
-    /// This PCA can be made into Kernel PCA by using Random Fourier Features transform. [Reference](https://web.stanford.edu/group/mmds/slides2010/Martinsson.pdf)
+    /// This trainer trains an approximate PCA using a randomized method for computing the singular value decomposition (SVD) of
+    /// the matrix whose rows are the input vectors.
+    /// The model generated by this trainer contains three parameters:
+    /// - A projection matrix $U$
+    /// - The mean vector in the original feature space $m$
+    /// - The mean vector in the projected feature space $p$
+    ///
+    /// For an input feature vector $x$, the anomaly score is computed by comparing the $L_2$
+    /// norm of the original input vector, and the $L_2$ norm of the projected vector:
+    /// $\sqrt{\left(\|x-m\|_2^2 - \|Ux-p\|_2^2\right)\|x-m\|_2^2}$.
+    ///
+    /// The method is described [here](https://web.stanford.edu/group/mmds/slides2010/Martinsson.pdf).
+    ///
+    /// Note that the algorithm can be made into Kernel PCA by applying the <xref:Microsoft.ML.Transforms.ApproximatedKernelTransformer>
+    /// to the data before passing it to the trainer.
     /// ]]>
     /// </format>
     /// </remarks>
     /// <seealso cref="PcaCatalog.RandomizedPca(AnomalyDetectionCatalog.AnomalyDetectionTrainers, string, string, int, int, bool, int?)"/>
-    /// <seealso cref="PcaCatalog.RandomizedPca(AnomalyDetectionCatalog.AnomalyDetectionTrainers, RandomizedPcaTrainer.Options)"/>
+    /// <seealso cref="PcaCatalog.RandomizedPca(AnomalyDetectionCatalog.AnomalyDetectionTrainers, Options)"/>
     /// <seealso cref="Options"/>
     public sealed class RandomizedPcaTrainer : TrainerEstimatorBase<AnomalyPredictionTransformer<PcaModelParameters>, PcaModelParameters>
     {
@@ -66,6 +80,10 @@ public sealed class RandomizedPcaTrainer : TrainerEstimatorBase<AnomalyPredictio
         internal const string Summary = "This algorithm trains an approximate PCA using Randomized SVD algorithm. "
             + "This PCA can be made into Kernel PCA by using Random Fourier Features transform.";
 
+        /// <summary>
+        /// Options for the <see cref="RandomizedPcaTrainer"/> as used in
+        /// [RandomizedPca(Options)](xref:Microsoft.ML.PcaCatalog.RandomizedPca(Microsoft.ML.AnomalyDetectionCatalog.AnomalyDetectionTrainers,Microsoft.ML.Trainers.RandomizedPcaTrainer.Options)).
+        /// </summary>
         public sealed class Options : UnsupervisedTrainerInputBaseWithWeight
         {
             [Argument(ArgumentType.AtMostOnce, HelpText = "The number of components in the PCA", ShortName = "k", SortOrder = 50)]