Skip to content

Commit 548f323

Browse files
authored
XML documentation for Randomized PCA trainer. (#3429)
* XML documentation for Randomized PCA trainer. * PR feedback. * PR feedback.:
1 parent 1b277b5 commit 548f323

File tree

4 files changed

+56
-11
lines changed

4 files changed

+56
-11
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
### Input and Output Columns
2+
The input features column data must be a known-sized vector of <xref:System.Single>. This trainer outputs the following columns:
3+
4+
| Output Column Name | Column Type | Description|
5+
| -- | -- | -- |
6+
| `Score` | <xref:System.Single> | The non-negative, unbounded score that was calculated by the anomaly detection model.|
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
### Input and Output Columns
2-
The input label column data must be <xref:System.Boolean>. This trainer outputs the following columns:
2+
The input label column data must be <xref:System.Boolean>.
3+
The input features column data must be a known-sized vector of <xref:System.Single>. This trainer outputs the following columns:
34

45
| Output Column Name | Column Type | Description|
56
| -- | -- | -- |
6-
| `Score` | <xref:System.Single> | The unbounded score that was calculated by the trainer to determine the prediction.|
7-
| `PredictedLabel` | <xref:System.Boolean> | The label predicted by the trainer. `false` maps to negative score and `true` maps to positive score.|
8-
| `Probability` | <xref:System.Single> | The probability of having true as the label. Probability value is in range [0, 1].||
7+
| `Score` | <xref:System.Single> | The unbounded score that was calculated by the model.|
8+
| `PredictedLabel` | <xref:System.Boolean> | The predicted label, based on the sign of the score. A negative score maps to `false` and a positive score maps to `true`.|
9+
| `Probability` | <xref:System.Single> | The probability calculated by calibrating the score of having true as the label. Probability value is in range [0, 1].||

src/Microsoft.ML.PCA/PCACatalog.cs

+5-4
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,12 @@ internal static PrincipalComponentAnalyzer ProjectToPrincipalComponents(this Tra
4040
=> new PrincipalComponentAnalyzer(CatalogUtils.GetEnvironment(catalog), columns);
4141

4242
/// <summary>
43-
/// Trains an approximate principal component analysis (PCA) model using randomized SVD algorithm.
43+
/// Create <see cref="RandomizedPcaTrainer"/>, which trains an approximate principal component analysis (PCA) model using randomized singular value decomposition (SVD) algorithm.
4444
/// </summary>
4545
/// <param name="catalog">The anomaly detection catalog trainer object.</param>
46-
/// <param name="featureColumnName">The name of the feature column.</param>
47-
/// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
46+
/// <param name="featureColumnName">The name of the feature column. The column data must be a known-sized vector of <see cref="System.Single"/>.</param>
47+
/// <param name="exampleWeightColumnName">The name of the example weight column (optional). To use the weight column, the column data
48+
/// must be of type <see cref="System.Single"/>.</param>
4849
/// <param name="rank">The number of components in the PCA.</param>
4950
/// <param name="oversampling">Oversampling parameter for randomized PCA training.</param>
5051
/// <param name="ensureZeroMean">If enabled, data is centered to be zero mean.</param>
@@ -69,7 +70,7 @@ public static RandomizedPcaTrainer RandomizedPca(this AnomalyDetectionCatalog.An
6970
}
7071

7172
/// <summary>
72-
/// Trains an approximate principal component analysis (PCA) model using randomized SVD algorithm.
73+
/// Create <see cref="RandomizedPcaTrainer"/> with advanced options, which trains an approximate principal component analysis (PCA) model using randomized singular value decomposition (SVD) algorithm.
7374
/// </summary>
7475
/// <param name="catalog">The anomaly detection catalog trainer object.</param>
7576
/// <param name="options">Advanced options to the algorithm.</param>

src/Microsoft.ML.PCA/PcaTrainer.cs

+40-3
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,45 @@ namespace Microsoft.ML.Trainers
3333
// REVIEW: make RFF transformer an option here.
3434

3535
/// <summary>
36-
/// This trainer trains an approximate PCA using Randomized SVD algorithm
37-
/// Reference: https://web.stanford.edu/group/mmds/slides2010/Martinsson.pdf
36+
/// The <see cref="IEstimator{TTransformer}"/> for training an approximate PCA using Randomized SVD algorithm.
3837
/// </summary>
3938
/// <remarks>
40-
/// This PCA can be made into Kernel PCA by using Random Fourier Features transform
39+
/// <format type="text/markdown"><![CDATA[
40+
/// To create this trainer, use [RandomizedPca](xref:Microsoft.ML.PcaCatalog.RandomizedPca(Microsoft.ML.AnomalyDetectionCatalog.AnomalyDetectionTrainers,System.String,System.String,System.Int32,System.Int32,System.Boolean,System.Nullable{System.Int32}))
41+
/// or [RandomizedPca(Options)](xref:Microsoft.ML.PcaCatalog.RandomizedPca(Microsoft.ML.AnomalyDetectionCatalog.AnomalyDetectionTrainers,Microsoft.ML.Trainers.RandomizedPcaTrainer.Options)).
42+
///
43+
/// [!include[io](~/../docs/samples/docs/api-reference/io-anomaly-detection.md)]
44+
///
45+
/// ### Trainer Characteristics
46+
/// | | |
47+
/// | -- | -- |
48+
/// | Machine learning task | Anomaly Detection |
49+
/// | Is normalization required? | No |
50+
/// | Is caching required? | No |
51+
/// | Required NuGet in addition to Microsoft.ML | None |
52+
///
53+
/// ### Training Algorithm Details
54+
/// This trainer trains an approximate PCA using a randomized method for computing the singular value decomposition (SVD) of
55+
/// the matrix whose rows are the input vectors.
56+
/// The model generated by this trainer contains three parameters:
57+
/// - A projection matrix $U$
58+
/// - The mean vector in the original feature space $m$
59+
/// - The mean vector in the projected feature space $p$
60+
///
61+
/// For an input feature vector $x$, the anomaly score is computed by comparing the $L_2$
62+
/// norm of the original input vector, and the $L_2$ norm of the projected vector:
63+
/// $\sqrt{\left(\|x-m\|_2^2 - \|Ux-p\|_2^2\right)\|x-m\|_2^2}$.
64+
///
65+
/// The method is described [here](https://web.stanford.edu/group/mmds/slides2010/Martinsson.pdf).
66+
///
67+
/// Note that the algorithm can be made into Kernel PCA by applying the <xref:Microsoft.ML.Transforms.ApproximatedKernelTransformer>
68+
/// to the data before passing it to the trainer.
69+
/// ]]>
70+
/// </format>
4171
/// </remarks>
72+
/// <seealso cref="PcaCatalog.RandomizedPca(AnomalyDetectionCatalog.AnomalyDetectionTrainers, string, string, int, int, bool, int?)"/>
73+
/// <seealso cref="PcaCatalog.RandomizedPca(AnomalyDetectionCatalog.AnomalyDetectionTrainers, Options)"/>
74+
/// <seealso cref="Options"/>
4275
public sealed class RandomizedPcaTrainer : TrainerEstimatorBase<AnomalyPredictionTransformer<PcaModelParameters>, PcaModelParameters>
4376
{
4477
internal const string LoadNameValue = "pcaAnomaly";
@@ -47,6 +80,10 @@ public sealed class RandomizedPcaTrainer : TrainerEstimatorBase<AnomalyPredictio
4780
internal const string Summary = "This algorithm trains an approximate PCA using Randomized SVD algorithm. "
4881
+ "This PCA can be made into Kernel PCA by using Random Fourier Features transform.";
4982

83+
/// <summary>
84+
/// Options for the <see cref="RandomizedPcaTrainer"/> as used in
85+
/// [RandomizedPca(Options)](xref:Microsoft.ML.PcaCatalog.RandomizedPca(Microsoft.ML.AnomalyDetectionCatalog.AnomalyDetectionTrainers,Microsoft.ML.Trainers.RandomizedPcaTrainer.Options)).
86+
/// </summary>
5087
public sealed class Options : UnsupervisedTrainerInputBaseWithWeight
5188
{
5289
[Argument(ArgumentType.AtMostOnce, HelpText = "The number of components in the PCA", ShortName = "k", SortOrder = 50)]

0 commit comments

Comments
 (0)