Addressing the PR comments.

sfilipi · sfilipi · commit 437e367dce9f · 2018-11-26T13:58:32.000-08:00
diff --git a/docs/code/MlNetCookBook.md b/docs/code/MlNetCookBook.md
@@ -1297,7 +1297,7 @@ var dynamicPipeline =
     // Concatenate all the features together into one column 'Features'.
     mlContext.Transforms.Concatenate("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")
     // Note that the label is text, so it needs to be converted to key.
-    .Append(mlContext.Transforms.Categorical.MapValueToKey("Label"), TransformerScope.TrainTest)
+    .Append(mlContext.Transforms.Conversions.MapValueToKey("Label"), TransformerScope.TrainTest)
     // Use the multi-class SDCA model to predict the label using features.
     .Append(mlContext.MulticlassClassification.Trainers.StochasticDualCoordinateAscent());
 
diff --git a/src/Microsoft.ML.Data/Transforms/ColumnCopying.cs b/src/Microsoft.ML.Data/Transforms/ColumnCopying.cs
diff --git a/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs b/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs
@@ -93,7 +93,7 @@ public static KeyToVectorMappingEstimator MapKeyToVector(this TransformsCatalog.
             => new KeyToVectorMappingEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn, outputColumn, bag);
 
         /// <summary>
-        /// Initializes a new instance of <see cref="ValueToKeyMappingEstimator"/>.
+        /// Converts value types into <see cref="KeyType"/>.
         /// </summary>
         /// <param name="catalog">The categorical transform's catalog.</param>
         /// <param name="inputColumn">Name of the column to be transformed.</param>
@@ -109,7 +109,7 @@ public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.Co
            => new ValueToKeyMappingEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn, outputColumn, maxNumTerms, sort);
 
         /// <summary>
-        /// Initializes a new instance of <see cref="ValueToKeyMappingEstimator"/> loading the terms to use from <paramref name="file"/>.
+        /// Converts value types into <see cref="KeyType"/> loading the keys to use from <paramref name="file"/>.
         /// </summary>
         /// <param name="catalog">The categorical transform's catalog.</param>
         /// <param name="columns">The data columns to map to keys.</param>
diff --git a/src/Microsoft.ML.PCA/PcaTransform.cs b/src/Microsoft.ML.PCA/PcaTransform.cs
@@ -674,7 +674,6 @@ internal static class Defaults
         private readonly IHost _host;
         private readonly PcaTransform.ColumnInfo[] _columns;
 
-        /// <summary>Initializes a new instance of <see cref="PrincipalComponentAnalysisEstimator"/>.</summary>
         /// <include file='doc.xml' path='doc/members/member[@name="PCA"]/*'/>
         /// <param name="env">The environment to use.</param>
         /// <param name="inputColumn">Input column to project to Principal Component.</param>
@@ -692,7 +691,6 @@ public PrincipalComponentAnalysisEstimator(IHostEnvironment env, string inputCol
         {
         }
 
-        /// <summary>Initializes a new instance of <see cref="PrincipalComponentAnalysisEstimator"/>.</summary>
         /// <include file='doc.xml' path='doc/members/member[@name="PCA"]/*'/>
         /// <param name="env">The environment to use.</param>
         /// <param name="columns">The dataset columns to use, and their specific settings.</param>
diff --git a/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs b/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs
@@ -25,10 +25,13 @@
 namespace Microsoft.ML.Trainers
 {
     /// <summary>
-    /// Train a matrix factorization model. It factotizes the training matrix into the product of two low-rank matrices.
-    /// <p>The basic idea of matrix factorization is finding two low-rank factor marcies to apporimate the training matrix.
+    /// Train a matrix factorization model. It factorizes the training matrix into the product of two low-rank matrices.
+    /// </summary>
+    /// <remarks>
+    /// <para>The basic idea of matrix factorization is finding two low-rank factor marcies to apporimate the training matrix.
     /// In this module, the expected training data is a list of tuples. Every tuple consists of a column index, a row index,
-    /// and the value at the location specified by the two indexes. For an example data structure of a tuple, one can use
+    /// and the value at the location specified by the two indexes. For an example data structure of a tuple, one can use:
+    /// <para>
     /// <code language="csharp">
     /// // The following variables defines the shape of a m-by-n matrix. The variable firstRowIndex indicates the integer that
     /// // would be mapped to the first row index. If user data uses 0-based indices for rows, firstRowIndex can be set to 0.
@@ -53,39 +56,37 @@ namespace Microsoft.ML.Trainers
     ///     public float Value;
     /// }
     /// </code>
-    /// Notice that it's not necessary to specify all entries in the training matrix, so matrix factorization can be used to fill <i>missing values</i>.
-    /// This behavior is very helpful when building recommender systems.</p>
-    /// <p>To provide a better understanding on practical uses of matrix factorization, let's consider music recommendation as an example.
+    /// <para> Notice that it's not necessary to specify all entries in the training matrix, so matrix factorization can be used to fill <i>missing values</i>.
+    /// This behavior is very helpful when building recommender systems.</para>
+    /// <para>To provide a better understanding on practical uses of matrix factorization, let's consider music recommendation as an example.
     /// Assume that user IDs and music IDs are used as row and column indexes, respectively, and matrix's values are ratings provided by those users. That is,
     /// rating <i>r</i> at row <i>r</i> and column <i>v</i> means that user <i>u</i> give <i>r</i> to item <i>v</i>.
     /// An imcomplete matrix is very common because not all users may provide their feedbacks to all products (for example, no one can rate ten million songs).
     /// Assume that<i>R</i> is a m-by-n rating matrix and the rank of the two factor matrices are<i>P</i> (m-by-k matrix) and <i>Q</i> (n-by-k matrix), where k is the approximation rank.
     /// The predicted rating at the u-th row and the v-th column in <i>R</i> would be the inner product of the u-th row of P and the v-th row of Q; that is,
     /// <i>R</i> is approximated by the product of <i>P</i>'s transpose and <i>Q</i>. This trainer implements
     /// <a href='https://www.csie.ntu.edu.tw/~cjlin/papers/libmf/mf_adaptive_pakdd.pdf'>a stochastic gradient method</a> for finding <i>P</i>
-    /// and <i>Q</i> via minimizing the distance between<i> R</i> and the product of <i>P</i>'s transpose and Q.</p>.
-    /// <p>For users interested in the mathematical details, please see the references below.
-    ///     <list type = 'bullet'>
-    ///         <item>
-    ///             <description><a href='https://www.csie.ntu.edu.tw/~cjlin/papers/libmf/libmf_journal.pdf' > A Fast Parallel Stochastic Gradient Method for Matrix Factorization in Shared Memory Systems</a></description>
-    ///         </item>
-    ///         <item>
-    ///             <description><a href='https://www.csie.ntu.edu.tw/~cjlin/papers/libmf/mf_adaptive_pakdd.pdf' > A Learning-rate Schedule for Stochastic Gradient Methods to Matrix Factorization</a></description>
-    ///         </item>
-    ///         <item>
-    ///             <description><a href='https://www.csie.ntu.edu.tw/~cjlin/papers/libmf/libmf_open_source.pdf' > LIBMF: A Library for Parallel Matrix Factorization in Shared-memory Systems</a></description>
-    ///         </item>
-    ///     </list>
-    /// </p>
-    /// <p>Example code can be found by searching for <i>MatrixFactorization</i> in <a href='https://github.com/dotnet/machinelearning'>ML.NET.</a></p>
+    /// and <i>Q</i> via minimizing the distance between<i> R</i> and the product of <i>P</i>'s transpose and Q.</para>.
+    /// <para>For users interested in the mathematical details, please see the references below.</para>
+    /// <list type = 'bullet'>
+    ///     <item>
+    ///         <description><a href='https://www.csie.ntu.edu.tw/~cjlin/papers/libmf/libmf_journal.pdf' > A Fast Parallel Stochastic Gradient Method for Matrix Factorization in Shared Memory Systems</a></description>
+    ///     </item>
+    ///     <item>
+    ///         <description><a href='https://www.csie.ntu.edu.tw/~cjlin/papers/libmf/mf_adaptive_pakdd.pdf' > A Learning-rate Schedule for Stochastic Gradient Methods to Matrix Factorization</a></description>
+    ///     </item>
+    ///     <item>
+    ///         <description><a href='https://www.csie.ntu.edu.tw/~cjlin/papers/libmf/libmf_open_source.pdf' > LIBMF: A Library for Parallel Matrix Factorization in Shared-memory Systems</a></description>
+    ///     </item>
+    /// </list>
+    /// </remarks>
     /// <example>
     /// <format type="text/markdown">
     /// <![CDATA[
     /// [!code-csharp[MF](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/MatrixFactorization.cs)]
     /// ]]>
     /// </format>
     /// </example>
-    /// </summary>
     public sealed class MatrixFactorizationTrainer : TrainerBase<MatrixFactorizationPredictor>,
         IEstimator<MatrixFactorizationPredictionTransformer>
     {
diff --git a/src/Microsoft.ML.Recommender/RecommenderCatalog.cs b/src/Microsoft.ML.Recommender/RecommenderCatalog.cs
@@ -45,8 +45,14 @@ internal RecommendationTrainers(RecommendationContext ctx)
             }
 
             /// <summary>
-            /// Initializing a new instance of <see cref="MatrixFactorizationTrainer"/>.
+            /// Train a matrix factorization model. It factorizes the training matrix into the product of two low-rank matrices.
             /// </summary>
+            /// <remarks>
+            /// <para>The basic idea of matrix factorization is finding two low-rank factor marcies to apporimate the training matrix.</para>
+            /// <para>In this module, the expected training data is a list of tuples. Every tuple consists of a column index, a row index,
+            /// and the value at the location specified by the two indexes.
+            /// </para>
+            /// </remarks>
             /// <param name="matrixColumnIndexColumnName">The name of the column hosting the matrix's column IDs.</param>
             /// <param name="matrixRowIndexColumnName">The name of the column hosting the matrix's row IDs.</param>
             /// <param name="labelColumn">The name of the label column.</param>
@@ -62,7 +68,7 @@ public MatrixFactorizationTrainer MatrixFactorization(
         }
 
         /// <summary>
-        /// Evaluates scored recommendation data.
+        /// Evaluates the scored recommendation data.
         /// </summary>
         /// <param name="data">The scored data.</param>
         /// <param name="label">The name of the label column in <paramref name="data"/>.</param>
diff --git a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
@@ -314,7 +314,8 @@ public static MulticlassLogisticRegression LogisticRegression(this MulticlassCla
         }
 
         /// <summary>
-        /// Initializes a new instance of <see cref="MultiClassNaiveBayesTrainer"/>
+        /// Predicts a target using a linear multiclass classification model trained with the <see cref="MultiClassNaiveBayesTrainer"/>.
+        /// The <see cref="MultiClassNaiveBayesTrainer"/> trains a multiclass Naive Bayes predictor that supports binary feature values.
         /// </summary>
         /// <param name="ctx">The <see cref="MulticlassClassificationContext.MulticlassClassificationTrainers"/>.</param>
         /// <param name="labelColumn">The name of the label column.</param>
@@ -328,8 +329,15 @@ public static MultiClassNaiveBayesTrainer NaiveBayes(this MulticlassClassificati
         }
 
         /// <summary>
-        /// Initializes a new instance of <see cref="Ova"/>.
+        /// Predicts a target using a linear multiclass classification model trained with the <see cref="Ova"/>.
         /// </summary>
+        /// <remarks>
+        /// <para>
+        /// In <see cref="Ova"/> In this strategy, a binary classification algorithm is used to train one classifier for each class,
+        /// which distinguishes that class from all other classes. Prediction is then performed by running these binary classifiers,
+        /// and choosing the prediction with the highest confidence score.
+        /// </para>
+        /// </remarks>
         /// <param name="ctx">The <see cref="MulticlassClassificationContext.MulticlassClassificationTrainers"/>.</param>
         /// <param name="binaryEstimator">An instance of a binary <see cref="ITrainerEstimator{TTransformer, TPredictor}"/> used as the base trainer.</param>
         /// <param name="calibrator">The calibrator. If a calibrator is not explicitely provided, it will default to <see cref="PlattCalibratorTrainer"/></param>
@@ -350,8 +358,15 @@ public static Ova OneVersusAll(this MulticlassClassificationContext.MulticlassCl
         }
 
         /// <summary>
-        /// Initializes a new instance of the <see cref="Pkpd"/>
+        /// Predicts a target using a linear multiclass classification model trained with the <see cref="Pkpd"/>.
         /// </summary>
+        /// <remarks>
+        /// <para>
+        /// In the Pairwise coupling (PKPD) strategy, a binary classification algorithm is used to train one classifier for each pair of classes.
+        /// Prediction is then performed by running these binary classifiers, and computing a score for each class by counting how many of the binary
+        /// classifiers predicted it. The prediction is the class with the highest score.
+        /// </para>
+        /// </remarks>
         /// <param name="ctx">The <see cref="MulticlassClassificationContext.MulticlassClassificationTrainers"/>.</param>
         /// <param name="binaryEstimator">An instance of a binary <see cref="ITrainerEstimator{TTransformer, TPredictor}"/> used as the base trainer.</param>
         /// <param name="calibrator">The calibrator. If a calibrator is not explicitely provided, it will default to <see cref="PlattCalibratorTrainer"/></param>
@@ -370,8 +385,19 @@ public static Pkpd PairwiseCoupling(this MulticlassClassificationContext.Multicl
         }
 
         /// <summary>
-        /// Initializes a new instance of <see cref="LinearSvm"/>.
+        /// Predict a target using a linear binary classification model trained with the <see cref="LinearSvm"/> trainer.
         /// </summary>
+        /// <remarks>
+        /// <para>
+        /// The idea behind support vector machines, is to map instances into a high dimensional space
+        /// in which the two classes are linearly separable, i.e., there exists a hyperplane such that all the positive examples are on one side of it,
+        /// and all the negative examples are on the other.
+        /// </para>
+        /// <para>
+        /// After this mapping, quadratic programming is used to find the separating hyperplane that maximizes the
+        /// margin, i.e., the minimal distance between it and the instances.
+        /// </para>
+        /// </remarks>
         /// <param name="ctx">The <see cref="BinaryClassificationContext"/>.</param>
         /// <param name="labelColumn">The name of the label column. </param>
         /// <param name="featureColumn">The name of the feature column.</param>
diff --git a/src/Microsoft.ML.TensorFlow/TensorflowCatalog.cs b/src/Microsoft.ML.TensorFlow/TensorflowCatalog.cs
@@ -22,7 +22,7 @@ public static TensorFlowEstimator ScoreTensorFlowModel(this TransformsCatalog ca
             string modelLocation,
             string[] inputs,
             string[] outputs)
-           => new TensorFlowEstimator(CatalogUtils.GetEnvironment(catalog), modelLocation, inputs, outputs);
+            => new TensorFlowEstimator(CatalogUtils.GetEnvironment(catalog), modelLocation, inputs, outputs);
 
         /// <summary>
         /// Scores a dataset using a pre-traiend TensorFlow model specified via <paramref name="tensorFlowModel"/>.
@@ -35,7 +35,7 @@ public static TensorFlowEstimator ScoreTensorFlowModel(this TransformsCatalog ca
             TensorFlowModelInfo tensorFlowModel,
             string[] inputs,
             string[] outputs)
-           => new TensorFlowEstimator(CatalogUtils.GetEnvironment(catalog), tensorFlowModel, inputs, outputs);
+            => new TensorFlowEstimator(CatalogUtils.GetEnvironment(catalog), tensorFlowModel, inputs, outputs);
 
         /// <summary>
         /// Score or Retrain a tensorflow model (based on setting of the <see cref="TensorFlowTransform.Arguments.ReTrain"/>) setting.
@@ -52,7 +52,7 @@ public static TensorFlowEstimator TensorFlow(this TransformsCatalog catalog,
         /// </summary>
         /// <param name="catalog">The transform's catalog.</param>
         /// <param name="args">The <see cref="TensorFlowTransform.Arguments"/> specifying the inputs and the settings of the <see cref="TensorFlowEstimator"/>.</param>
-        /// <param name="tensorFlowModel"></param>
+        /// <param name="tensorFlowModel">The pre-trained TensorFlow model.</param>
         public static TensorFlowEstimator TensorFlow(this TransformsCatalog catalog,
             TensorFlowTransform.Arguments args,
             TensorFlowModelInfo tensorFlowModel)
diff --git a/src/Microsoft.ML.Transforms/ConversionsCatalog.cs b/src/Microsoft.ML.Transforms/ConversionsCatalog.cs
@@ -19,18 +19,16 @@ public static class ConversionsCatalog
         /// </summary>
         /// <param name="catalog">The categorical transform's catalog.</param>
         /// <param name="columns">The input column.</param>
-        /// <returns></returns>
         public static KeyToBinaryVectorMappingEstimator MapKeyToBinaryVector(this TransformsCatalog.ConversionTransforms catalog,
             params KeyToBinaryVectorMappingTransformer.ColumnInfo[] columns)
-        => new KeyToBinaryVectorMappingEstimator(CatalogUtils.GetEnvironment(catalog), columns);
+            => new KeyToBinaryVectorMappingEstimator(CatalogUtils.GetEnvironment(catalog), columns);
 
         /// <summary>
         ///  Convert the key types back to binary verctor.
         /// </summary>
         /// <param name="catalog">The categorical transform's catalog.</param>
         /// <param name="inputColumn">The name of the input column of the transformation.</param>
         /// <param name="outputColumn">The name of the column produced by the transformation.</param>
-        /// <returns></returns>
         public static KeyToBinaryVectorMappingEstimator MapKeyToBinaryVector(this TransformsCatalog.ConversionTransforms catalog,
             string inputColumn,
             string outputColumn = null)
diff --git a/src/Microsoft.ML.Transforms/ExtensionsCatalog.cs b/src/Microsoft.ML.Transforms/ExtensionsCatalog.cs

Original file line number	Diff line number	Diff line change
`@@ -674,7 +674,6 @@ internal static class Defaults`
`674`	`674`	`private readonly IHost _host;`
`675`	`675`	`private readonly PcaTransform.ColumnInfo[] _columns;`
`676`	`676`
`677`		`- /// <summary>Initializes a new instance of <see cref="PrincipalComponentAnalysisEstimator"/>.</summary>`
`678`	`677`	`/// <include file='doc.xml' path='doc/members/member[@name="PCA"]/*'/>`
`679`	`678`	`/// <param name="env">The environment to use.</param>`
`680`	`679`	`/// <param name="inputColumn">Input column to project to Principal Component.</param>`
`@@ -692,7 +691,6 @@ public PrincipalComponentAnalysisEstimator(IHostEnvironment env, string inputCol`
`692`	`691`	`{`
`693`	`692`	`}`
`694`	`693`
`695`		`- /// <summary>Initializes a new instance of <see cref="PrincipalComponentAnalysisEstimator"/>.</summary>`
`696`	`694`	`/// <include file='doc.xml' path='doc/members/member[@name="PCA"]/*'/>`
`697`	`695`	`/// <param name="env">The environment to use.</param>`
`698`	`696`	`/// <param name="columns">The dataset columns to use, and their specific settings.</param>`