Skip to content

Commit 029b34e

Browse files
committed
PR feedback.:
1 parent 4dd116c commit 029b34e

File tree

4 files changed

+36
-10
lines changed

4 files changed

+36
-10
lines changed
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
### Input and Output Columns
2+
The input features column data must be a known-sized vector of <xref:System.Single>. This trainer outputs the following columns:
3+
4+
| Output Column Name | Column Type | Description|
5+
| -- | -- | -- |
6+
| `Score` | <xref:System.Single> | The non-negative, unbounded score that was calculated by the anomaly detection model.|
Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
### Input and Output Columns
2-
The input label column data must be <xref:System.Boolean>. This trainer outputs the following columns:
2+
The input label column data must be <xref:System.Boolean>.
3+
The input features column data must be a known-sized vector of <xref:System.Single>. This trainer outputs the following columns:
34

45
| Output Column Name | Column Type | Description|
56
| -- | -- | -- |
6-
| `Score` | <xref:System.Single> | The unbounded score that was calculated by the trainer to determine the prediction.|
7-
| `PredictedLabel` | <xref:System.Boolean> | The label predicted by the trainer. `false` maps to negative score and `true` maps to positive score.|
8-
| `Probability` | <xref:System.Single> | The probability of having true as the label. Probability value is in range [0, 1].||
7+
| `Score` | <xref:System.Single> | The unbounded score that was calculated by the model.|
8+
| `PredictedLabel` | <xref:System.Boolean> | The predicted label, based on the sign of the score. A negative score maps to `false` and a positive score maps to `true`.|
9+
| `Probability` | <xref:System.Single> | The probability calculated by calibrating the score of having true as the label. Probability value is in range [0, 1].||

src/Microsoft.ML.PCA/PCACatalog.cs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,12 @@ internal static PrincipalComponentAnalyzer ProjectToPrincipalComponents(this Tra
4040
=> new PrincipalComponentAnalyzer(CatalogUtils.GetEnvironment(catalog), columns);
4141

4242
/// <summary>
43-
/// Create <see cref="RandomizedPcaTrainer"/>, which trains an approximate principal component analysis (PCA) model using randomized SVD algorithm.
43+
/// Create <see cref="RandomizedPcaTrainer"/>, which trains an approximate principal component analysis (PCA) model using randomized singular value decomposition (SVD) algorithm.
4444
/// </summary>
4545
/// <param name="catalog">The anomaly detection catalog trainer object.</param>
4646
/// <param name="featureColumnName">The name of the feature column. The column data must be a known-sized vector of <see cref="System.Single"/>.</param>
47-
/// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
47+
/// <param name="exampleWeightColumnName">The name of the example weight column (optional). To use the weight column, the column data
48+
/// must be of type <see cref="System.Single"/>.</param>
4849
/// <param name="rank">The number of components in the PCA.</param>
4950
/// <param name="oversampling">Oversampling parameter for randomized PCA training.</param>
5051
/// <param name="ensureZeroMean">If enabled, data is centered to be zero mean.</param>
@@ -69,7 +70,7 @@ public static RandomizedPcaTrainer RandomizedPca(this AnomalyDetectionCatalog.An
6970
}
7071

7172
/// <summary>
72-
/// Create <see cref="RandomizedPcaTrainer"/> using advanced options, which trains an approximate principal component analysis (PCA) model using randomized SVD algorithm.
73+
/// Create <see cref="RandomizedPcaTrainer"/> with advanced options, which trains an approximate principal component analysis (PCA) model using randomized singular value decomposition (SVD) algorithm.
7374
/// </summary>
7475
/// <param name="catalog">The anomaly detection catalog trainer object.</param>
7576
/// <param name="options">Advanced options to the algorithm.</param>

src/Microsoft.ML.PCA/PcaTrainer.cs

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ namespace Microsoft.ML.Trainers
4040
/// To create this trainer, use [RandomizedPca](xref:Microsoft.ML.PcaCatalog.RandomizedPca(Microsoft.ML.AnomalyDetectionCatalog.AnomalyDetectionTrainers,System.String,System.String,System.Int32,System.Int32,System.Boolean,System.Nullable{System.Int32}))
4141
/// or [RandomizedPca(Options)](xref:Microsoft.ML.PcaCatalog.RandomizedPca(Microsoft.ML.AnomalyDetectionCatalog.AnomalyDetectionTrainers,Microsoft.ML.Trainers.RandomizedPcaTrainer.Options)).
4242
///
43-
/// [!include[io](~/../docs/samples/docs/api-reference/io-columns-regression.md)]
43+
/// [!include[io](~/../docs/samples/docs/api-reference/io-anomaly-detection.md)]
4444
///
4545
/// ### Trainer Characteristics
4646
/// | | |
@@ -51,12 +51,26 @@ namespace Microsoft.ML.Trainers
5151
/// | Required NuGet in addition to Microsoft.ML | None |
5252
///
5353
/// ### Training Algorithm Details
54-
/// This PCA can be made into Kernel PCA by using Random Fourier Features transform. [Reference](https://web.stanford.edu/group/mmds/slides2010/Martinsson.pdf)
54+
/// This trainer trains an approximate PCA using a randomized method for computing the singular value decomposition (SVD) of
55+
/// the matrix whose rows are the input vectors.
56+
/// The model generated by this trainer contains three parameters:
57+
/// - A projection matrix $U$
58+
/// - The mean vector in the original feature space $m$
59+
/// - The mean vector in the projected feature space $p$
60+
///
61+
/// For an input feature vector $x$, the anomaly score is computed by comparing the $L_2$
62+
/// norm of the original input vector, and the $L_2$ norm of the projected vector:
63+
/// $\sqrt{\left(\|x-m\|_2^2 - \|Ux-p\|_2^2\right)\|x-m\|_2^2}$.
64+
///
65+
/// The method is described [here](https://web.stanford.edu/group/mmds/slides2010/Martinsson.pdf).
66+
///
67+
/// Note that the algorithm can be made into Kernel PCA by applying the <xref:Microsoft.ML.Transforms.ApproximatedKernelTransformer>
68+
/// to the data before passing it to the trainer.
5569
/// ]]>
5670
/// </format>
5771
/// </remarks>
5872
/// <seealso cref="PcaCatalog.RandomizedPca(AnomalyDetectionCatalog.AnomalyDetectionTrainers, string, string, int, int, bool, int?)"/>
59-
/// <seealso cref="PcaCatalog.RandomizedPca(AnomalyDetectionCatalog.AnomalyDetectionTrainers, RandomizedPcaTrainer.Options)"/>
73+
/// <seealso cref="PcaCatalog.RandomizedPca(AnomalyDetectionCatalog.AnomalyDetectionTrainers, Options)"/>
6074
/// <seealso cref="Options"/>
6175
public sealed class RandomizedPcaTrainer : TrainerEstimatorBase<AnomalyPredictionTransformer<PcaModelParameters>, PcaModelParameters>
6276
{
@@ -66,6 +80,10 @@ public sealed class RandomizedPcaTrainer : TrainerEstimatorBase<AnomalyPredictio
6680
internal const string Summary = "This algorithm trains an approximate PCA using Randomized SVD algorithm. "
6781
+ "This PCA can be made into Kernel PCA by using Random Fourier Features transform.";
6882

83+
/// <summary>
84+
/// Options for the <see cref="RandomizedPcaTrainer"/> as used in
85+
/// [RandomizedPca(Options)](xref:Microsoft.ML.PcaCatalog.RandomizedPca(Microsoft.ML.AnomalyDetectionCatalog.AnomalyDetectionTrainers,Microsoft.ML.Trainers.RandomizedPcaTrainer.Options)).
86+
/// </summary>
6987
public sealed class Options : UnsupervisedTrainerInputBaseWithWeight
7088
{
7189
[Argument(ArgumentType.AtMostOnce, HelpText = "The number of components in the PCA", ShortName = "k", SortOrder = 50)]

0 commit comments

Comments
 (0)