diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/PriorTrainerSample.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/PriorTrainerSample.cs similarity index 64% rename from docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/PriorTrainerSample.cs rename to docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/PriorTrainerSample.cs index 4d46f4ef5c..1eadee9777 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/PriorTrainerSample.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/PriorTrainerSample.cs @@ -7,22 +7,22 @@ public class PriorTrainer { public static void Example() { - // Downloading the dataset from github.com/dotnet/machinelearning. - // This will create a sentiment.tsv file in the filesystem. - // You can open this file, if you want to see the data. - string dataFile = SamplesUtils.DatasetUtils.DownloadSentimentDataset()[0]; + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + var mlContext = new MLContext(); + + // Download and featurize the dataset. + var dataFiles = SamplesUtils.DatasetUtils.DownloadSentimentDataset(); + var trainFile = dataFiles[0]; + var testFile = dataFiles[1]; // A preview of the data. // Sentiment SentimentText // 0 " :Erm, thank you. " // 1 ==You're cool== - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - var mlContext = new MLContext(); - - // Step 1: Load the data as an IDataView. - // First, we define the loader: specify the data columns and where to find them in the text file. + // Step 1: Read the data as an IDataView. + // First, we define the reader: specify the data columns and where to find them in the text file. var loader = mlContext.Data.CreateTextLoader( columns: new[] { @@ -31,12 +31,9 @@ public static void Example() }, hasHeader: true ); - - // Load the data - var data = loader.Load(dataFile); - // Split it between training and test data - var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data); + // Load the data + var trainData = loader.Load(trainFile); // Step 2: Pipeline // Featurize the text column through the FeaturizeText API. @@ -47,19 +44,27 @@ public static void Example() .Append(mlContext.BinaryClassification.Trainers.Prior(labelColumnName: "Sentiment")); // Step 3: Train the pipeline - var trainedPipeline = pipeline.Fit(trainTestData.TrainSet); + var trainedPipeline = pipeline.Fit(trainData); // Step 4: Evaluate on the test set - var transformedData = trainedPipeline.Transform(trainTestData.TestSet); + var transformedData = trainedPipeline.Transform(loader.Load(testFile)); var evalMetrics = mlContext.BinaryClassification.Evaluate(transformedData, label: "Sentiment"); - - // Step 5: Inspect the output - Console.WriteLine("Accuracy: " + evalMetrics.Accuracy); + SamplesUtils.ConsoleUtils.PrintMetrics(evalMetrics); // The Prior trainer outputs the proportion of a label in the dataset as the probability of that label. - // In this case it means that there is a split of around 64%-36% of positive and negative labels in the dataset. + // In this case 'Accuracy: 0.50' means that there is a split of around 50%-50% of positive and negative labels in the test dataset. // Expected output: - // Accuracy: 0.647058823529412 + + // Accuracy: 0.50 + // AUC: 0.50 + // F1 Score: 0.67 + // Negative Precision: 0.00 + // Negative Recall: 0.00 + // Positive Precision: 0.50 + // Positive Recall: 1.00 + // LogLoss: 1.05 + // LogLossReduction: -4.89 + // Entropy: 1.00 } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/RandomTrainerSample.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/RandomTrainerSample.cs similarity index 61% rename from docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/RandomTrainerSample.cs rename to docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/RandomTrainerSample.cs index 9f339dffc0..fe9012eee3 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/RandomTrainerSample.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/RandomTrainerSample.cs @@ -7,23 +7,23 @@ public static class RandomTrainer { public static void Example() { - // Downloading the dataset from github.com/dotnet/machinelearning. - // This will create a sentiment.tsv file in the filesystem. - // You can open this file, if you want to see the data. - string dataFile = SamplesUtils.DatasetUtils.DownloadSentimentDataset()[0]; + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + var mlContext = new MLContext(seed: 1); + + // Download and featurize the dataset. + var dataFiles = SamplesUtils.DatasetUtils.DownloadSentimentDataset(); + var trainFile = dataFiles[0]; + var testFile = dataFiles[1]; // A preview of the data. // Sentiment SentimentText // 0 " :Erm, thank you. " // 1 ==You're cool== - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - var mlContext = new MLContext(seed: 1); - - // Step 1: Load the data as an IDataView. - // First, we define the loader: specify the data columns and where to find them in the text file. - var loader = mlContext.Data.CreateTextLoader( + // Step 1: Read the data as an IDataView. + // First, we define the reader: specify the data columns and where to find them in the text file. + var reader = mlContext.Data.CreateTextLoader( columns: new[] { new TextLoader.Column("Sentiment", DataKind.Single, 0), @@ -31,35 +31,40 @@ public static void Example() }, hasHeader: true ); - - // Load the data - var data = loader.Load(dataFile); - // Split it between training and test data - var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data); + // Read the data + var trainData = reader.Load(trainFile); // Step 2: Pipeline // Featurize the text column through the FeaturizeText API. // Then append a binary classifier, setting the "Label" column as the label of the dataset, and // the "Features" column produced by FeaturizeText as the features column. var pipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText") - .AppendCacheCheckpoint(mlContext) // Add a data-cache step within a pipeline. + .AppendCacheCheckpoint(mlContext) .Append(mlContext.BinaryClassification.Trainers.Random()); // Step 3: Train the pipeline - var trainedPipeline = pipeline.Fit(trainTestData.TrainSet); + var trainedPipeline = pipeline.Fit(trainData); // Step 4: Evaluate on the test set - var transformedData = trainedPipeline.Transform(trainTestData.TestSet); + var transformedData = trainedPipeline.Transform(reader.Load(testFile)); var evalMetrics = mlContext.BinaryClassification.Evaluate(transformedData, label: "Sentiment"); - - // Step 5: Inspect the output - Console.WriteLine("Accuracy: " + evalMetrics.Accuracy); + SamplesUtils.ConsoleUtils.PrintMetrics(evalMetrics); // We expect an output probability closet to 0.5 as the Random trainer outputs a random prediction. // Regardless of the input features, the trainer will predict either positive or negative label with equal probability. - // Expected output (close to 0.5): - // Accuracy: 0.588235294117647 + // Expected output: (close to 0.5): + + // Accuracy: 0.56 + // AUC: 0.57 + // F1 Score: 0.60 + // Negative Precision: 0.57 + // Negative Recall: 0.44 + // Positive Precision: 0.55 + // Positive Recall: 0.67 + // LogLoss: 1.53 + // LogLossReduction: -53.37 + // Entropy: 1.00 } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicStochasticGradientDescentWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicStochasticGradientDescentWithOptions.cs index f547cd9712..31b1955a3e 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicStochasticGradientDescentWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicStochasticGradientDescentWithOptions.cs @@ -20,7 +20,7 @@ public static void Example() var split = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1); // Create data training pipeline var pipeline = mlContext.BinaryClassification.Trainers.SymbolicStochasticGradientDescent( - new ML.Trainers.HalLearners.SymSgdClassificationTrainer.Options() + new ML.Trainers.HalLearners.SymbolicStochasticGradientDescentClassificationTrainer.Options() { LearningRate = 0.2f, NumberOfIterations = 10, diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OrdinaryLeastSquaresWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OrdinaryLeastSquaresWithOptions.cs index eda2bdda7a..9d2db10930 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OrdinaryLeastSquaresWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OrdinaryLeastSquaresWithOptions.cs @@ -44,7 +44,7 @@ public static void Example() // Create the estimator, here we only need OrdinaryLeastSquares trainer // as data is already processed in a form consumable by the trainer - var pipeline = mlContext.Regression.Trainers.OrdinaryLeastSquares(new OlsLinearRegressionTrainer.Options() + var pipeline = mlContext.Regression.Trainers.OrdinaryLeastSquares(new OrdinaryLeastSquaresRegressionTrainer.Options() { L2Weight = 0.1f, PerParameterSignificance = false diff --git a/src/Microsoft.ML.HalLearners/ComputeLRTrainingStdThroughHal.cs b/src/Microsoft.ML.HalLearners/ComputeLRTrainingStdThroughHal.cs index ce3bad8b99..44b9dcbf34 100644 --- a/src/Microsoft.ML.HalLearners/ComputeLRTrainingStdThroughHal.cs +++ b/src/Microsoft.ML.HalLearners/ComputeLRTrainingStdThroughHal.cs @@ -9,7 +9,7 @@ namespace Microsoft.ML.Trainers { - using Mkl = OlsLinearRegressionTrainer.Mkl; + using Mkl = OrdinaryLeastSquaresRegressionTrainer.Mkl; public sealed class ComputeLRTrainingStdThroughHal : ComputeLRTrainingStd { diff --git a/src/Microsoft.ML.HalLearners/HalLearnersCatalog.cs b/src/Microsoft.ML.HalLearners/HalLearnersCatalog.cs index 89d8021df5..c368acb8ef 100644 --- a/src/Microsoft.ML.HalLearners/HalLearnersCatalog.cs +++ b/src/Microsoft.ML.HalLearners/HalLearnersCatalog.cs @@ -9,12 +9,12 @@ namespace Microsoft.ML { /// - /// The trainer catalog extensions for the and . + /// The trainer catalog extensions for the and . /// public static class HalLearnersCatalog { /// - /// Predict a target using a linear regression model trained with the . + /// Predict a target using a linear regression model trained with the . /// /// The . /// The name of the label column. @@ -27,28 +27,28 @@ public static class HalLearnersCatalog /// ]]> /// /// - public static OlsLinearRegressionTrainer OrdinaryLeastSquares(this RegressionCatalog.RegressionTrainers catalog, + public static OrdinaryLeastSquaresRegressionTrainer OrdinaryLeastSquares(this RegressionCatalog.RegressionTrainers catalog, string labelColumnName = DefaultColumnNames.Label, string featureColumnName = DefaultColumnNames.Features, string exampleWeightColumnName = null) { Contracts.CheckValue(catalog, nameof(catalog)); var env = CatalogUtils.GetEnvironment(catalog); - var options = new OlsLinearRegressionTrainer.Options + var options = new OrdinaryLeastSquaresRegressionTrainer.Options { LabelColumnName = labelColumnName, FeatureColumnName = featureColumnName, ExampleWeightColumnName = exampleWeightColumnName }; - return new OlsLinearRegressionTrainer(env, options); + return new OrdinaryLeastSquaresRegressionTrainer(env, options); } /// - /// Predict a target using a linear regression model trained with the . + /// Predict a target using a linear regression model trained with the . /// /// The . - /// Algorithm advanced options. See . + /// Algorithm advanced options. See . /// /// /// /// /// - public static OlsLinearRegressionTrainer OrdinaryLeastSquares( + public static OrdinaryLeastSquaresRegressionTrainer OrdinaryLeastSquares( this RegressionCatalog.RegressionTrainers catalog, - OlsLinearRegressionTrainer.Options options) + OrdinaryLeastSquaresRegressionTrainer.Options options) { Contracts.CheckValue(catalog, nameof(catalog)); Contracts.CheckValue(options, nameof(options)); var env = CatalogUtils.GetEnvironment(catalog); - return new OlsLinearRegressionTrainer(env, options); + return new OrdinaryLeastSquaresRegressionTrainer(env, options); } /// - /// Predict a target using a linear binary classification model trained with the . + /// Predict a target using a linear binary classification model trained with the . /// /// The . /// The name of the label column. @@ -81,28 +81,28 @@ public static OlsLinearRegressionTrainer OrdinaryLeastSquares( /// ]]> /// /// - public static SymSgdClassificationTrainer SymbolicStochasticGradientDescent(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, + public static SymbolicStochasticGradientDescentClassificationTrainer SymbolicStochasticGradientDescent(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, string labelColumnName = DefaultColumnNames.Label, string featureColumnName = DefaultColumnNames.Features, - int numberOfIterations = SymSgdClassificationTrainer.Defaults.NumberOfIterations) + int numberOfIterations = SymbolicStochasticGradientDescentClassificationTrainer.Defaults.NumberOfIterations) { Contracts.CheckValue(catalog, nameof(catalog)); var env = CatalogUtils.GetEnvironment(catalog); - var options = new SymSgdClassificationTrainer.Options + var options = new SymbolicStochasticGradientDescentClassificationTrainer.Options { LabelColumnName = labelColumnName, FeatureColumnName = featureColumnName, }; - return new SymSgdClassificationTrainer(env, options); + return new SymbolicStochasticGradientDescentClassificationTrainer(env, options); } /// - /// Predict a target using a linear binary classification model trained with the . + /// Predict a target using a linear binary classification model trained with the . /// /// The . - /// Algorithm advanced options. See . + /// Algorithm advanced options. See . /// /// /// /// /// - public static SymSgdClassificationTrainer SymbolicStochasticGradientDescent( + public static SymbolicStochasticGradientDescentClassificationTrainer SymbolicStochasticGradientDescent( this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, - SymSgdClassificationTrainer.Options options) + SymbolicStochasticGradientDescentClassificationTrainer.Options options) { Contracts.CheckValue(catalog, nameof(catalog)); Contracts.CheckValue(options, nameof(options)); var env = CatalogUtils.GetEnvironment(catalog); - return new SymSgdClassificationTrainer(env, options); + return new SymbolicStochasticGradientDescentClassificationTrainer(env, options); } /// diff --git a/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs b/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs index 8204ac87d7..be556ea7c8 100644 --- a/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs +++ b/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs @@ -17,22 +17,22 @@ using Microsoft.ML.Model; using Microsoft.ML.Trainers.HalLearners; -[assembly: LoadableClass(OlsLinearRegressionTrainer.Summary, typeof(OlsLinearRegressionTrainer), typeof(OlsLinearRegressionTrainer.Options), +[assembly: LoadableClass(OrdinaryLeastSquaresRegressionTrainer.Summary, typeof(OrdinaryLeastSquaresRegressionTrainer), typeof(OrdinaryLeastSquaresRegressionTrainer.Options), new[] { typeof(SignatureRegressorTrainer), typeof(SignatureTrainer), typeof(SignatureFeatureScorerTrainer) }, - OlsLinearRegressionTrainer.UserNameValue, - OlsLinearRegressionTrainer.LoadNameValue, - OlsLinearRegressionTrainer.ShortName)] + OrdinaryLeastSquaresRegressionTrainer.UserNameValue, + OrdinaryLeastSquaresRegressionTrainer.LoadNameValue, + OrdinaryLeastSquaresRegressionTrainer.ShortName)] [assembly: LoadableClass(typeof(OlsLinearRegressionModelParameters), null, typeof(SignatureLoadModel), "OLS Linear Regression Executor", OlsLinearRegressionModelParameters.LoaderSignature)] -[assembly: LoadableClass(typeof(void), typeof(OlsLinearRegressionTrainer), null, typeof(SignatureEntryPointModule), OlsLinearRegressionTrainer.LoadNameValue)] +[assembly: LoadableClass(typeof(void), typeof(OrdinaryLeastSquaresRegressionTrainer), null, typeof(SignatureEntryPointModule), OrdinaryLeastSquaresRegressionTrainer.LoadNameValue)] namespace Microsoft.ML.Trainers.HalLearners { /// - public sealed class OlsLinearRegressionTrainer : TrainerEstimatorBase, OlsLinearRegressionModelParameters> + public sealed class OrdinaryLeastSquaresRegressionTrainer : TrainerEstimatorBase, OlsLinearRegressionModelParameters> { /// Advanced options for trainer. public sealed class Options : TrainerInputBaseWithWeight @@ -73,9 +73,9 @@ public sealed class Options : TrainerInputBaseWithWeight public override TrainerInfo Info => _info; /// - /// Initializes a new instance of + /// Initializes a new instance of /// - internal OlsLinearRegressionTrainer(IHostEnvironment env, Options options) + internal OrdinaryLeastSquaresRegressionTrainer(IHostEnvironment env, Options options) : base(Contracts.CheckRef(env, nameof(env)).Register(LoadNameValue), TrainerUtils.MakeR4VecFeature(options.FeatureColumnName), TrainerUtils.MakeR4ScalarColumn(options.LabelColumnName), TrainerUtils.MakeR4ScalarWeightColumn(options.ExampleWeightColumnName)) { @@ -500,7 +500,7 @@ internal static CommonOutputs.RegressionOutput TrainRegression(IHostEnvironment EntryPointUtils.CheckInputArgs(host, options); return TrainerEntryPointsUtils.Train(host, options, - () => new OlsLinearRegressionTrainer(host, options), + () => new OrdinaryLeastSquaresRegressionTrainer(host, options), () => TrainerEntryPointsUtils.FindColumn(host, options.TrainingData.Schema, options.LabelColumnName), () => TrainerEntryPointsUtils.FindColumn(host, options.TrainingData.Schema, options.ExampleWeightColumnName)); } @@ -545,7 +545,7 @@ private static VersionInfo GetVersionInfo() /// are all null. A model may not have per parameter statistics because either /// there were not more examples than parameters in the model, or because they /// were explicitly suppressed in training by setting - /// + /// /// to false. /// public bool HasStatistics => StandardErrors != null; diff --git a/src/Microsoft.ML.HalLearners/SymSgdClassificationTrainer.cs b/src/Microsoft.ML.HalLearners/SymSgdClassificationTrainer.cs index 009e6efd1e..a111f9b44e 100644 --- a/src/Microsoft.ML.HalLearners/SymSgdClassificationTrainer.cs +++ b/src/Microsoft.ML.HalLearners/SymSgdClassificationTrainer.cs @@ -19,20 +19,20 @@ using Microsoft.ML.Trainers.HalLearners; using Microsoft.ML.Transforms; -[assembly: LoadableClass(typeof(SymSgdClassificationTrainer), typeof(SymSgdClassificationTrainer.Options), +[assembly: LoadableClass(typeof(SymbolicStochasticGradientDescentClassificationTrainer), typeof(SymbolicStochasticGradientDescentClassificationTrainer.Options), new[] { typeof(SignatureBinaryClassifierTrainer), typeof(SignatureTrainer), typeof(SignatureFeatureScorerTrainer) }, - SymSgdClassificationTrainer.UserNameValue, - SymSgdClassificationTrainer.LoadNameValue, - SymSgdClassificationTrainer.ShortName)] + SymbolicStochasticGradientDescentClassificationTrainer.UserNameValue, + SymbolicStochasticGradientDescentClassificationTrainer.LoadNameValue, + SymbolicStochasticGradientDescentClassificationTrainer.ShortName)] -[assembly: LoadableClass(typeof(void), typeof(SymSgdClassificationTrainer), null, typeof(SignatureEntryPointModule), SymSgdClassificationTrainer.LoadNameValue)] +[assembly: LoadableClass(typeof(void), typeof(SymbolicStochasticGradientDescentClassificationTrainer), null, typeof(SignatureEntryPointModule), SymbolicStochasticGradientDescentClassificationTrainer.LoadNameValue)] namespace Microsoft.ML.Trainers.HalLearners { using TPredictor = CalibratedModelParametersBase; /// - public sealed class SymSgdClassificationTrainer : TrainerEstimatorBase, TPredictor> + public sealed class SymbolicStochasticGradientDescentClassificationTrainer : TrainerEstimatorBase, TPredictor> { internal const string LoadNameValue = "SymbolicSGD"; internal const string UserNameValue = "Symbolic SGD (binary)"; @@ -195,9 +195,9 @@ private protected override TPredictor TrainModelCore(TrainContext context) private protected override PredictionKind PredictionKind => PredictionKind.BinaryClassification; /// - /// Initializes a new instance of + /// Initializes a new instance of /// - internal SymSgdClassificationTrainer(IHostEnvironment env, Options options) + internal SymbolicStochasticGradientDescentClassificationTrainer(IHostEnvironment env, Options options) : base(Contracts.CheckRef(env, nameof(env)).Register(LoadNameValue), TrainerUtils.MakeR4VecFeature(options.FeatureColumnName), TrainerUtils.MakeBoolScalarLabel(options.LabelColumnName)) { @@ -223,7 +223,7 @@ private protected override BinaryPredictionTransformer MakeTransform => new BinaryPredictionTransformer(Host, model, trainSchema, FeatureColumn.Name); /// - /// Continues the training of a using an already trained + /// Continues the training of using an already trained /// a . /// public BinaryPredictionTransformer Fit(IDataView trainData, LinearModelParameters modelParameters) @@ -241,8 +241,8 @@ private protected override SchemaShape.Column[] GetOutputColumnsCore(SchemaShape [TlcModule.EntryPoint(Name = "Trainers.SymSgdBinaryClassifier", Desc = "Train a symbolic SGD.", - UserName = SymSgdClassificationTrainer.UserNameValue, - ShortName = SymSgdClassificationTrainer.ShortName)] + UserName = SymbolicStochasticGradientDescentClassificationTrainer.UserNameValue, + ShortName = SymbolicStochasticGradientDescentClassificationTrainer.ShortName)] internal static CommonOutputs.BinaryClassificationOutput TrainSymSgd(IHostEnvironment env, Options options) { Contracts.CheckValue(env, nameof(env)); @@ -251,7 +251,7 @@ internal static CommonOutputs.BinaryClassificationOutput TrainSymSgd(IHostEnviro EntryPointUtils.CheckInputArgs(host, options); return TrainerEntryPointsUtils.Train(host, options, - () => new SymSgdClassificationTrainer(host, options), + () => new SymbolicStochasticGradientDescentClassificationTrainer(host, options), () => TrainerEntryPointsUtils.FindColumn(host, options.TrainingData.Schema, options.LabelColumnName)); } @@ -324,7 +324,7 @@ public void Free() // giving an array, we are at _storage[_storageIndex][_indexInCurArray]. private int _indexInCurArray; // This is used to access AccelMemBudget, AccelChunkSize and UsedMemory - private readonly SymSgdClassificationTrainer _trainer; + private readonly SymbolicStochasticGradientDescentClassificationTrainer _trainer; private readonly IChannel _ch; @@ -336,7 +336,7 @@ public void Free() /// /// /// - public ArrayManager(SymSgdClassificationTrainer trainer, IChannel ch) + public ArrayManager(SymbolicStochasticGradientDescentClassificationTrainer trainer, IChannel ch) { _storage = new List(); // Setting the default value to 2^17. @@ -500,7 +500,7 @@ private sealed class InputDataManager : IDisposable // This is the index to go over the instances in instanceProperties private int _instanceIndex; // This is used to access AccelMemBudget, AccelChunkSize and UsedMemory - private readonly SymSgdClassificationTrainer _trainer; + private readonly SymbolicStochasticGradientDescentClassificationTrainer _trainer; private readonly IChannel _ch; // Whether memorySize was big enough to load the entire instances into the buffer @@ -511,7 +511,7 @@ private sealed class InputDataManager : IDisposable // Tells if we have gone through the dataset entirely. public bool FinishedTheLoad => !_cursorMoveNext; - public InputDataManager(SymSgdClassificationTrainer trainer, FloatLabelCursor.Factory cursorFactory, IChannel ch) + public InputDataManager(SymbolicStochasticGradientDescentClassificationTrainer trainer, FloatLabelCursor.Factory cursorFactory, IChannel ch) { _instIndices = new ArrayManager(trainer, ch); _instValues = new ArrayManager(trainer, ch); diff --git a/src/Microsoft.ML.Recommender/MatrixFactorizationPredictor.cs b/src/Microsoft.ML.Recommender/MatrixFactorizationPredictor.cs index b3ad5c56f7..d8fa7f1546 100644 --- a/src/Microsoft.ML.Recommender/MatrixFactorizationPredictor.cs +++ b/src/Microsoft.ML.Recommender/MatrixFactorizationPredictor.cs @@ -52,12 +52,16 @@ private static VersionInfo GetVersionInfo() private const uint VersionNoMinCount = 0x00010002; private readonly IHost _host; + /// The number of rows. public readonly int NumberOfRows; + /// The number of columns. public readonly int NumberOfColumns; + /// The rank of the factor matrices. public readonly int ApproximationRank; + /// /// Left approximation matrix /// diff --git a/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs b/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs index d4f09b9eae..90c977935f 100644 --- a/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs +++ b/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs @@ -100,6 +100,7 @@ public enum LossFunctionType /// See Equation (1). /// SquareLossRegression = 0, + /// /// Used in implicit-feedback recommendation problem. /// @@ -115,7 +116,7 @@ public enum LossFunctionType public sealed class Options { /// - /// The name of variable (i.e., Column in a type system) used be as matrix's column index. + /// The name of variable (i.e., Column in a type system) used as matrix's column index. /// public string MatrixColumnIndexColumnName; diff --git a/src/Microsoft.ML.StandardLearners/Standard/Simple/SimpleTrainers.cs b/src/Microsoft.ML.StandardLearners/Standard/Simple/SimpleTrainers.cs index a201d6d655..73cecec274 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Simple/SimpleTrainers.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Simple/SimpleTrainers.cs @@ -93,6 +93,7 @@ private RandomModelParameters Train(TrainContext context) } RandomModelParameters ITrainer.Train(TrainContext context) => Train(context); + IPredictor ITrainer.Train(TrainContext context) => Train(context); /// @@ -264,8 +265,8 @@ internal sealed class Options { } - private readonly String _labelColumnName; - private readonly String _weightColumnName; + private readonly string _labelColumnName; + private readonly string _weightColumnName; private readonly IHost _host; /// Return the type of prediction task. @@ -357,6 +358,7 @@ private PriorModelParameters Train(TrainContext context) } IPredictor ITrainer.Train(TrainContext context) => Train(context); + PriorModelParameters ITrainer.Train(TrainContext context) => Train(context); private static SchemaShape.Column MakeFeatureColumn(string featureColumn) diff --git a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs index cf3150625f..e36f423834 100644 --- a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs +++ b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs @@ -701,7 +701,7 @@ public static LinearSvmTrainer LinearSupportVectorMachines(this BinaryClassifica /// /// /// /// public static RandomTrainer Random(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog) @@ -723,7 +723,7 @@ public static RandomTrainer Random(this BinaryClassificationCatalog.BinaryClassi /// /// /// /// public static PriorTrainer Prior(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, diff --git a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv index fc162e8dbe..09c043794a 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv +++ b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv @@ -62,14 +62,14 @@ Trainers.LogisticRegressionBinaryClassifier Logistic Regression is a method in s Trainers.LogisticRegressionClassifier Logistic Regression is a method in statistics used to predict the probability of occurrence of an event and can be used as a classification algorithm. The algorithm predicts the probability of occurrence of an event by fitting data to a logistical function. Microsoft.ML.Trainers.LogisticRegression TrainMultiClass Microsoft.ML.Trainers.MulticlassLogisticRegression+Options Microsoft.ML.EntryPoints.CommonOutputs+MulticlassClassificationOutput Trainers.NaiveBayesClassifier Train a MultiClassNaiveBayesTrainer. Microsoft.ML.Trainers.MultiClassNaiveBayesTrainer TrainMultiClassNaiveBayesTrainer Microsoft.ML.Trainers.MultiClassNaiveBayesTrainer+Options Microsoft.ML.EntryPoints.CommonOutputs+MulticlassClassificationOutput Trainers.OnlineGradientDescentRegressor Train a Online gradient descent perceptron. Microsoft.ML.Trainers.OnlineGradientDescentTrainer TrainRegression Microsoft.ML.Trainers.OnlineGradientDescentTrainer+Options Microsoft.ML.EntryPoints.CommonOutputs+RegressionOutput -Trainers.OrdinaryLeastSquaresRegressor Train an OLS regression model. Microsoft.ML.Trainers.HalLearners.OlsLinearRegressionTrainer TrainRegression Microsoft.ML.Trainers.HalLearners.OlsLinearRegressionTrainer+Options Microsoft.ML.EntryPoints.CommonOutputs+RegressionOutput +Trainers.OrdinaryLeastSquaresRegressor Train an OLS regression model. Microsoft.ML.Trainers.HalLearners.OrdinaryLeastSquaresRegressionTrainer TrainRegression Microsoft.ML.Trainers.HalLearners.OrdinaryLeastSquaresRegressionTrainer+Options Microsoft.ML.EntryPoints.CommonOutputs+RegressionOutput Trainers.PcaAnomalyDetector Train an PCA Anomaly model. Microsoft.ML.Trainers.RandomizedPcaTrainer TrainPcaAnomaly Microsoft.ML.Trainers.RandomizedPcaTrainer+Options Microsoft.ML.EntryPoints.CommonOutputs+AnomalyDetectionOutput Trainers.PoissonRegressor Train an Poisson regression model. Microsoft.ML.Trainers.PoissonRegression TrainRegression Microsoft.ML.Trainers.PoissonRegression+Options Microsoft.ML.EntryPoints.CommonOutputs+RegressionOutput Trainers.StochasticDualCoordinateAscentBinaryClassifier Train an SDCA binary model. Microsoft.ML.Trainers.Sdca TrainBinary Microsoft.ML.Trainers.LegacySdcaBinaryTrainer+Options Microsoft.ML.EntryPoints.CommonOutputs+BinaryClassificationOutput Trainers.StochasticDualCoordinateAscentClassifier The SDCA linear multi-class classification trainer. Microsoft.ML.Trainers.Sdca TrainMultiClass Microsoft.ML.Trainers.SdcaMultiClassTrainer+Options Microsoft.ML.EntryPoints.CommonOutputs+MulticlassClassificationOutput Trainers.StochasticDualCoordinateAscentRegressor The SDCA linear regression trainer. Microsoft.ML.Trainers.Sdca TrainRegression Microsoft.ML.Trainers.SdcaRegressionTrainer+Options Microsoft.ML.EntryPoints.CommonOutputs+RegressionOutput Trainers.StochasticGradientDescentBinaryClassifier Train an Hogwild SGD binary model. Microsoft.ML.Trainers.LegacySgdBinaryTrainer TrainBinary Microsoft.ML.Trainers.LegacySgdBinaryTrainer+Options Microsoft.ML.EntryPoints.CommonOutputs+BinaryClassificationOutput -Trainers.SymSgdBinaryClassifier Train a symbolic SGD. Microsoft.ML.Trainers.HalLearners.SymSgdClassificationTrainer TrainSymSgd Microsoft.ML.Trainers.HalLearners.SymSgdClassificationTrainer+Options Microsoft.ML.EntryPoints.CommonOutputs+BinaryClassificationOutput +Trainers.SymSgdBinaryClassifier Train a symbolic SGD. Microsoft.ML.Trainers.HalLearners.SymbolicStochasticGradientDescentClassificationTrainer TrainSymSgd Microsoft.ML.Trainers.HalLearners.SymbolicStochasticGradientDescentClassificationTrainer+Options Microsoft.ML.EntryPoints.CommonOutputs+BinaryClassificationOutput Transforms.ApproximateBootstrapSampler Approximate bootstrap sampling. Microsoft.ML.Transforms.BootstrapSample GetSample Microsoft.ML.Transforms.BootstrapSamplingTransformer+Options Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput Transforms.BinaryPredictionScoreColumnsRenamer For binary prediction, it renames the PredictedLabel and Score columns to include the name of the positive class. Microsoft.ML.EntryPoints.ScoreModel RenameBinaryPredictionScoreColumns Microsoft.ML.EntryPoints.ScoreModel+RenameBinaryPredictionScoreColumnsInput Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput Transforms.BinNormalizer The values are assigned into equidensity bins and a value is mapped to its bin_number/number_of_bins. Microsoft.ML.Data.Normalize Bin Microsoft.ML.Transforms.NormalizeTransform+BinArguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs index 87e788c31f..746e0fd16d 100644 --- a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs +++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs @@ -327,7 +327,7 @@ public void EntryPointCatalogCheckDuplicateParams() Env.ComponentCatalog.RegisterAssembly(typeof(LightGbmBinaryModelParameters).Assembly); Env.ComponentCatalog.RegisterAssembly(typeof(TensorFlowTransformer).Assembly); Env.ComponentCatalog.RegisterAssembly(typeof(ImageLoadingTransformer).Assembly); - Env.ComponentCatalog.RegisterAssembly(typeof(SymSgdClassificationTrainer).Assembly); + Env.ComponentCatalog.RegisterAssembly(typeof(SymbolicStochasticGradientDescentClassificationTrainer).Assembly); Env.ComponentCatalog.RegisterAssembly(typeof(SaveOnnxCommand).Assembly); Env.ComponentCatalog.RegisterAssembly(typeof(TimeSeriesProcessingEntryPoints).Assembly); Env.ComponentCatalog.RegisterAssembly(typeof(ParquetLoader).Assembly); diff --git a/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs b/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs index 731b013189..8e6156634f 100644 --- a/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs +++ b/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs @@ -41,7 +41,7 @@ protected override void InitializeEnvironment(IHostEnvironment environment) base.InitializeEnvironment(environment); environment.ComponentCatalog.RegisterAssembly(typeof(LightGbmBinaryModelParameters).Assembly); - environment.ComponentCatalog.RegisterAssembly(typeof(SymSgdClassificationTrainer).Assembly); + environment.ComponentCatalog.RegisterAssembly(typeof(SymbolicStochasticGradientDescentClassificationTrainer).Assembly); } /// diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/SimpleTrainAndPredict.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/SimpleTrainAndPredict.cs index a29c38befe..a6366365a0 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/SimpleTrainAndPredict.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/SimpleTrainAndPredict.cs @@ -65,7 +65,7 @@ public void SimpleTrainAndPredictSymSGD() // Pipeline. var pipeline = ml.Transforms.Text.FeaturizeText("Features", "SentimentText") .AppendCacheCheckpoint(ml) - .Append(ml.BinaryClassification.Trainers.SymbolicStochasticGradientDescent(new SymSgdClassificationTrainer.Options + .Append(ml.BinaryClassification.Trainers.SymbolicStochasticGradientDescent(new SymbolicStochasticGradientDescentClassificationTrainer.Options { NumberOfThreads = 1 })); diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/OlsLinearRegressionTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/OlsLinearRegressionTests.cs index 43e2629f1d..edcb2b8124 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/OlsLinearRegressionTests.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/OlsLinearRegressionTests.cs @@ -14,7 +14,7 @@ public partial class TrainerEstimators public void TestEstimatorOlsLinearRegression() { var dataView = GetRegressionPipeline(); - var trainer = ML.Regression.Trainers.OrdinaryLeastSquares(new OlsLinearRegressionTrainer.Options()); + var trainer = ML.Regression.Trainers.OrdinaryLeastSquares(new OrdinaryLeastSquaresRegressionTrainer.Options()); TestEstimatorCore(trainer, dataView); var model = trainer.Fit(dataView); @@ -22,7 +22,7 @@ public void TestEstimatorOlsLinearRegression() Assert.NotEmpty(model.Model.StandardErrors); Assert.NotEmpty(model.Model.PValues); Assert.NotEmpty(model.Model.TValues); - trainer = ML.Regression.Trainers.OrdinaryLeastSquares(new OlsLinearRegressionTrainer.Options() { PerParameterSignificance = false }); + trainer = ML.Regression.Trainers.OrdinaryLeastSquares(new OrdinaryLeastSquaresRegressionTrainer.Options() { PerParameterSignificance = false }); model = trainer.Fit(dataView); Assert.False(model.Model.HasStatistics); Done(); diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/SymSgdClassificationTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/SymSgdClassificationTests.cs index 8e4f6bf170..054203725c 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/SymSgdClassificationTests.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/SymSgdClassificationTests.cs @@ -15,7 +15,7 @@ public partial class TrainerEstimators public void TestEstimatorSymSgdClassificationTrainer() { (var pipe, var dataView) = GetBinaryClassificationPipeline(); - var trainer = new SymSgdClassificationTrainer(Env, new SymSgdClassificationTrainer.Options()); + var trainer = new SymbolicStochasticGradientDescentClassificationTrainer(Env, new SymbolicStochasticGradientDescentClassificationTrainer.Options()); var pipeWithTrainer = pipe.Append(trainer); TestEstimatorCore(pipeWithTrainer, dataView); @@ -34,11 +34,11 @@ public void TestEstimatorSymSgdInitPredictor() var initPredictor = ML.BinaryClassification.Trainers.StochasticDualCoordinateAscent().Fit(transformedData); var data = initPredictor.Transform(transformedData); - var withInitPredictor = new SymSgdClassificationTrainer(Env, new SymSgdClassificationTrainer.Options()).Fit(transformedData, + var withInitPredictor = new SymbolicStochasticGradientDescentClassificationTrainer(Env, new SymbolicStochasticGradientDescentClassificationTrainer.Options()).Fit(transformedData, modelParameters: initPredictor.Model.SubModel); var outInitData = withInitPredictor.Transform(transformedData); - var notInitPredictor = new SymSgdClassificationTrainer(Env, new SymSgdClassificationTrainer.Options()).Fit(transformedData); + var notInitPredictor = new SymbolicStochasticGradientDescentClassificationTrainer(Env, new SymbolicStochasticGradientDescentClassificationTrainer.Options()).Fit(transformedData); var outNoInitData = notInitPredictor.Transform(transformedData); int numExamples = 10;