From bf9d946227c2180b63661a417c15c6cfc909bd14 Mon Sep 17 00:00:00 2001 From: Shahab Moradi Date: Thu, 7 Feb 2019 16:00:25 -0800 Subject: [PATCH 01/14] Updated docs for AveragedPerceptron --- .../Standard/Online/AveragedLinear.cs | 35 +++++++++++++++++ .../Standard/Online/AveragedPerceptron.cs | 19 ++++++--- .../Standard/Online/OnlineLinear.cs | 16 ++++++++ .../Standard/Online/doc.xml | 39 ------------------- .../StandardLearnersCatalog.cs | 36 +++++++++++++---- 5 files changed, 93 insertions(+), 52 deletions(-) diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs index bf7a88d27d..0a73a0ac3b 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs @@ -18,36 +18,71 @@ namespace Microsoft.ML.Trainers.Online { public abstract class AveragedLinearArguments : OnlineLinearArguments { + /// + /// Learning rate + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Learning rate", ShortName = "lr", SortOrder = 50)] [TGUI(Label = "Learning rate", SuggestedSweeps = "0.01,0.1,0.5,1.0")] [TlcModule.SweepableDiscreteParam("LearningRate", new object[] { 0.01, 0.1, 0.5, 1.0 })] public float LearningRate = AveragedDefaultArgs.LearningRate; + /// + /// to decrease the learning rate as iterations progress; otherwise, . + /// Default is . + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Decrease learning rate", ShortName = "decreaselr", SortOrder = 50)] [TGUI(Label = "Decrease Learning Rate", Description = "Decrease learning rate as iterations progress")] [TlcModule.SweepableDiscreteParam("DecreaseLearningRate", new object[] { false, true })] public bool DecreaseLearningRate = AveragedDefaultArgs.DecreaseLearningRate; + /// + /// Number of examples after which weights will be reset to the current average. + /// Default is , which disables this feature. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Number of examples after which weights will be reset to the current average", ShortName = "numreset")] public long? ResetWeightsAfterXExamples = null; + /// + /// to update averaged weights only when loss is nonzero. + /// to update averaged weights on every example. + /// Default is . + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Instead of updating averaged weights on every example, only update when loss is nonzero", ShortName = "lazy")] public bool DoLazyUpdates = true; + /// + /// L2 weight for regularization. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "L2 Regularization Weight", ShortName = "reg", SortOrder = 50)] [TGUI(Label = "L2 Regularization Weight")] [TlcModule.SweepableFloatParam("L2RegularizerWeight", 0.0f, 0.4f)] public float L2RegularizerWeight = AveragedDefaultArgs.L2RegularizerWeight; + /// + /// Extra weight given to more recent updates. + /// Default is 0, i.e. no extra gain. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Extra weight given to more recent updates", ShortName = "rg")] public float RecencyGain = 0; + /// + /// means is multiplicative. + /// means is additive. + /// Default is . + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Whether Recency Gain is multiplicative (vs. additive)", ShortName = "rgm")] public bool RecencyGainMulti = false; + /// + /// to do averaging; otherwise, . + /// Default is . + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Do averaging?", ShortName = "avg")] public bool Averaged = true; + /// + /// The inexactness tolerance for averaging. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "The inexactness tolerance for averaging", ShortName = "avgtol")] public float AveragedTolerance = (float)1e-2; diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs index b3659974fa..2e82f8af5e 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs @@ -25,12 +25,10 @@ namespace Microsoft.ML.Trainers.Online { - // This is an averaged perceptron classifier. - // Configurable subcomponents: - // - Loss function. By default, hinge loss (aka max-margin avgd perceptron) - // - Feature normalization. By default, rescaling between min and max values for every feature - // - Prediction calibration to produce probabilities. Off by default, if on, uses exponential (aka Platt) calibration. - /// + /// + /// This is averaged perceptron trainer. + /// For usage details, please see + /// public sealed class AveragedPerceptronTrainer : AveragedLinearTrainer, LinearBinaryModelParameters> { public const string LoadNameValue = "AveragedPerceptron"; @@ -42,12 +40,21 @@ public sealed class AveragedPerceptronTrainer : AveragedLinearTrainer + /// The custom loss. Default is hinge loss. + /// [Argument(ArgumentType.Multiple, HelpText = "Loss Function", ShortName = "loss", SortOrder = 50)] public ISupportClassificationLossFactory LossFunction = new HingeLoss.Arguments(); + /// + /// The calibrator for producing probabilities. Default is exponential (aka Platt) calibration. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "The calibrator kind to apply to the predictor. Specify null for no calibration", Visibility = ArgumentAttribute.VisibilityType.EntryPointsOnly)] public ICalibratorTrainerFactory Calibrator = new PlattCalibratorTrainerFactory(); + /// + /// The maximum number of examples to use when training the calibrator. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "The maximum number of examples to use when training the calibrator", Visibility = ArgumentAttribute.VisibilityType.EntryPointsOnly)] public int MaxCalibrationExamples = 1000000; diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs index 2128c8e008..4e1cb6e341 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs @@ -20,24 +20,40 @@ namespace Microsoft.ML.Trainers.Online public abstract class OnlineLinearArguments : LearnerInputBaseWithLabel { + /// + /// Number of training iterations through the data. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Number of iterations", ShortName = "iter", SortOrder = 50)] [TGUI(Label = "Number of Iterations", Description = "Number of training iterations through data", SuggestedSweeps = "1,10,100")] [TlcModule.SweepableLongParamAttribute("NumIterations", 1, 100, stepSize: 10, isLogScale: true)] public int NumIterations = OnlineDefaultArgs.NumIterations; + /// + /// Initial weights and bias, comma-separated. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Initial Weights and bias, comma-separated", ShortName = "initweights")] [TGUI(NoSweep = true)] public string InitialWeights; + /// + /// Initial weights scale. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Init weights diameter", ShortName = "initwts", SortOrder = 140)] [TGUI(Label = "Initial Weights Scale", SuggestedSweeps = "0,0.1,0.5,1")] [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0.0f, 1.0f, numSteps: 5)] public float InitWtsDiameter = 0; + /// + /// to shuffle data for each training iteration; otherwise, . + /// Default is . + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Whether to shuffle for each training iteration", ShortName = "shuf")] [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[] { false, true })] public bool Shuffle = true; + /// + /// Size of cache when trained in Scope. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Size of cache when trained in Scope", ShortName = "cache")] public int StreamingCacheSize = 1000000; diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/doc.xml b/src/Microsoft.ML.StandardLearners/Standard/Online/doc.xml index 8e8f5dc2ba..292aeface5 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/doc.xml +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/doc.xml @@ -25,44 +25,5 @@ - - - - Averaged Perceptron Binary Classifier. - - - Perceptron is a classification algorithm that makes its predictions based on a linear function. - I.e., for an instance with feature values f0, f1,..., f_D-1, , the prediction is given by the sign of sigma[0,D-1] ( w_i * f_i), where w_0, w_1,...,w_D-1 are the weights computed by the algorithm. - - Perceptron is an online algorithm, i.e., it processes the instances in the training set one at a time. - The weights are initialized to be 0, or some random values. Then, for each example in the training set, the value of sigma[0, D-1] (w_i * f_i) is computed. - If this value has the same sign as the label of the current example, the weights remain the same. If they have opposite signs, - the weights vector is updated by either subtracting or adding (if the label is negative or positive, respectively) the feature vector of the current example, - multiplied by a factor 0 < a <= 1, called the learning rate. In a generalization of this algorithm, the weights are updated by adding the feature vector multiplied by the learning rate, - and by the gradient of some loss function (in the specific case described above, the loss is hinge-loss, whose gradient is 1 when it is non-zero). - - - In Averaged Perceptron (AKA voted-perceptron), the weight vectors are stored, - together with a weight that counts the number of iterations it survived (this is equivalent to storing the weight vector after every iteration, regardless of whether it was updated or not). - The prediction is then calculated by taking the weighted average of all the sums sigma[0, D-1] (w_i * f_i) or the different weight vectors. - - For more information see: - Wikipedia entry for Perceptron - Large Margin Classification Using the Perceptron Algorithm - - - - - - new AveragedPerceptronBinaryClassifier - { - NumIterations = 10, - L2RegularizerWeight = 0.01f, - LossFunction = new ExpLossClassificationLossFunction() - } - - - - diff --git a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs index 6442e4eb01..dd0ffc76c0 100644 --- a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs +++ b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs @@ -191,16 +191,37 @@ public static SdcaMultiClassTrainer StochasticDualCoordinateAscent(this Multicla } /// - /// Predict a target using a linear binary classification model trained with the AveragedPerceptron trainer. + /// Predict a target using a linear binary classification model trained with averaged perceptron trainer. /// + /// + /// Perceptron is a classification algorithm that makes its predictions based on a linear function. + /// For instance with feature values f0, f1,..., f_D-1, the prediction is given by the sign of sigma[0, D-1] (w_i * f_i), where w_0, w_1,..., w_D-1 are the weights computed by the algorithm. + /// + /// Perceptron is an online algorithm, i.e., it processes the instances in the training set one at a time. + /// The weights are initialized to be 0, or some random values. Then, for each example in the training set, the value of sigma[0, D-1] (w_i * f_i) is computed. + /// If this value has the same sign as the label of the current example, the weights remain the same.If they have opposite signs, + /// the weights vector is updated by either subtracting or adding (if the label is negative or positive, respectively) the feature vector of the current example, + /// multiplied by a factor 0 < a <= 1, called the learning rate.In a generalization of this algorithm, the weights are updated by adding the feature vector multiplied by the learning rate, + /// and by the gradient of some loss function (in the specific case described above, the loss is hinge-loss, whose gradient is 1 when it is non-zero). + /// + /// In Averaged Perceptron (AKA voted-perceptron), the weight vectors are stored, + /// together with a weight that counts the number of iterations it survived (this is equivalent to storing the weight vector after every iteration, regardless of whether it was updated or not). + /// The prediction is then calculated by taking the weighted average of all the sums sigma[0, D-1] (w_i * f_i) or the different weight vectors. + /// + /// For more information see Wikipedia entry for Perceptron + /// or Large Margin Classification Using the Perceptron Algorithm + /// /// The binary classification catalog trainer object. /// The name of the label column, or dependent variable. /// The features, or independent variables. - /// The custom loss. + /// The custom loss. If , hinge loss will be used resulting in max-margin averaged perceptron. /// The optional example weights. - /// The learning Rate. - /// Decrease learning rate as iterations progress. - /// L2 regularization weight. + /// Learning rate. + /// + /// to decrease the learning rate as iterations progress; otherwise, . + /// Default is . + /// + /// L2 weight for regularization. /// Number of training iterations through the data. public static AveragedPerceptronTrainer AveragedPerceptron( this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, @@ -220,10 +241,11 @@ public static AveragedPerceptronTrainer AveragedPerceptron( } /// - /// Predict a target using a linear binary classification model trained with the AveragedPerceptron trainer. + /// Predict a target using a linear binary classification model trained with averaged perceptron trainer using advanced options. + /// For trainer details, please see the remarks for /// /// The binary classification catalog trainer object. - /// Advanced arguments to the algorithm. + /// Advanced trainer options. public static AveragedPerceptronTrainer AveragedPerceptron( this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, AveragedPerceptronTrainer.Options options) { From 4222e85005663c696b96315ee0d2d0e8d84591e6 Mon Sep 17 00:00:00 2001 From: Shahab Moradi Date: Fri, 8 Feb 2019 14:29:34 -0800 Subject: [PATCH 02/14] Added a sample --- .../AveragedPerceptron.cs | 61 +++++++++++++++++++ src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs | 21 +++++++ .../SamplesDatasetUtils.cs | 31 ++++++++++ .../StandardLearnersCatalog.cs | 9 ++- 4 files changed, 121 insertions(+), 1 deletion(-) create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/BinaryClassification/AveragedPerceptron.cs create mode 100644 src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/BinaryClassification/AveragedPerceptron.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/BinaryClassification/AveragedPerceptron.cs new file mode 100644 index 0000000000..8871711b61 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/BinaryClassification/AveragedPerceptron.cs @@ -0,0 +1,61 @@ +using Microsoft.ML; + +namespace Microsoft.ML.Samples.Dynamic.BinaryClassification +{ + public static class AveragedPerceptron + { + public static void Example() + { + // In this examples we will use the adult income dataset. The goal is to predict + // if a person's income is above $50K or not, based on different pieces of information about that person. + // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult + + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + // Setting the seed to a fixed number in this examples to make outputs deterministic. + var mlContext = new MLContext(seed: 0); + + // Download the dataset and load it as IDataView + var data = SamplesUtils.DatasetUtils.LoadAdultDataset(mlContext); + + // Leave out 10% of data for testing + var (trainData, testData) = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1); + + // Create data processing pipeline + var pipeline = + // Convert categorical features to one-hot vectors + mlContext.Transforms.Categorical.OneHotEncoding("workclass") + .Append(mlContext.Transforms.Categorical.OneHotEncoding("education")) + .Append(mlContext.Transforms.Categorical.OneHotEncoding("marital-status")) + .Append(mlContext.Transforms.Categorical.OneHotEncoding("occupation")) + .Append(mlContext.Transforms.Categorical.OneHotEncoding("relationship")) + .Append(mlContext.Transforms.Categorical.OneHotEncoding("ethnicity")) + .Append(mlContext.Transforms.Categorical.OneHotEncoding("native-country")) + // Combine all features into one feature vector + .Append(mlContext.Transforms.Concatenate("Features", "workclass", "education", "marital-status", + "occupation", "relationship", "ethnicity", "native-country", "age", "education-num", + "capital-gain", "capital-loss", "hours-per-week")) + // Min-max normalized all the features + .Append(mlContext.Transforms.Normalize("Features")) + // Add the trainer + .Append(mlContext.BinaryClassification.Trainers.AveragedPerceptron("IsOver50K", "Features")); + + // Fit this pipeline to the training data + var model = pipeline.Fit(trainData); + + // Evaluate how the model is doing on the test data + var dataWithPredictions = model.Transform(testData); + var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions, "IsOver50K"); + SamplesUtils.ConsoleUtils.PrintBinaryClassificationMetrics(metrics); + + // Output: + // Accuracy: 0.85 + // AUC: 0.90 + // F1 Score: 0.66 + // Negative Precision: 0.89 + // Negative Recall: 0.91 + // Positive Precision: 0.69 + // Positive Recall: 0.63 + } + } +} \ No newline at end of file diff --git a/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs b/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs new file mode 100644 index 0000000000..814a251d6a --- /dev/null +++ b/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs @@ -0,0 +1,21 @@ +using System; +using System.Collections.Generic; +using System.Text; +using Microsoft.ML.Data; + +namespace Microsoft.ML.SamplesUtils +{ + public static class ConsoleUtils + { + public static void PrintBinaryClassificationMetrics(BinaryClassificationMetrics metrics) + { + Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}"); + Console.WriteLine($"AUC: {metrics.Auc:F2}"); + Console.WriteLine($"F1 Score: {metrics.F1Score:F2}"); + Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}"); + Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}"); + } + } +} diff --git a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs index edaf2d55c5..724f4d10f8 100644 --- a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs +++ b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs @@ -81,6 +81,37 @@ public static string DownloadSentimentDataset() public static string DownloadAdultDataset() => Download("https://raw.githubusercontent.com/dotnet/machinelearning/244a8c2ac832657af282aa312d568211698790aa/test/data/adult.train", "adult.txt"); + public static IDataView LoadAdultDataset(MLContext mlContext) + { + // Download the file + string dataFile = DownloadAdultDataset(); + + // Define the columns to read + var reader = mlContext.Data.CreateTextLoader( + columns: new[] + { + new TextLoader.Column("age", DataKind.R4, 0), + new TextLoader.Column("workclass", DataKind.TX, 1), + new TextLoader.Column("fnlwgt", DataKind.R4, 2), + new TextLoader.Column("education", DataKind.TX, 3), + new TextLoader.Column("education-num", DataKind.R4, 4), + new TextLoader.Column("marital-status", DataKind.TX, 5), + new TextLoader.Column("occupation", DataKind.TX, 6), + new TextLoader.Column("relationship", DataKind.TX, 7), + new TextLoader.Column("ethnicity", DataKind.TX, 8), + new TextLoader.Column("sex", DataKind.TX, 9), + new TextLoader.Column("capital-gain", DataKind.R4, 10), + new TextLoader.Column("capital-loss", DataKind.R4, 11), + new TextLoader.Column("hours-per-week", DataKind.R4, 12), + new TextLoader.Column("native-country", DataKind.R4, 13), + new TextLoader.Column("IsOver50K", DataKind.BL, 14), + }, + separatorChar: ',', + hasHeader: true + ); + + return reader.Read(dataFile); + } /// /// Downloads the breast cancer dataset from the ML.NET repo. /// diff --git a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs index dd0ffc76c0..01bbc7f60a 100644 --- a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs +++ b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs @@ -223,6 +223,13 @@ public static SdcaMultiClassTrainer StochasticDualCoordinateAscent(this Multicla /// /// L2 weight for regularization. /// Number of training iterations through the data. + /// + /// + /// + /// + /// public static AveragedPerceptronTrainer AveragedPerceptron( this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, string labelColumn = DefaultColumnNames.Label, @@ -242,7 +249,7 @@ public static AveragedPerceptronTrainer AveragedPerceptron( /// /// Predict a target using a linear binary classification model trained with averaged perceptron trainer using advanced options. - /// For trainer details, please see the remarks for + /// For usage details, please see /// /// The binary classification catalog trainer object. /// Advanced trainer options. From f4f03ba7101f16c231eecd58272197278be2a3d8 Mon Sep 17 00:00:00 2001 From: Shahab Moradi Date: Tue, 12 Feb 2019 11:23:53 -0800 Subject: [PATCH 03/14] Addressed PR comments --- .../AveragedPerceptron.cs | 61 ------------------- .../AveragedPerceptron.cs | 45 ++++++++++++++ src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs | 9 ++- .../Microsoft.ML.SamplesUtils.csproj | 2 + .../SamplesDatasetUtils.cs | 25 +++++++- .../Standard/Online/AveragedLinear.cs | 34 ++++++++--- .../Standard/Online/AveragedPerceptron.cs | 12 ++-- .../Standard/Online/LinearSvm.cs | 2 +- .../Standard/Online/OnlineGradientDescent.cs | 2 +- .../Standard/Online/OnlineLinear.cs | 33 +++++----- .../StandardLearnersCatalog.cs | 13 ++-- .../TestPredictors.cs | 2 +- .../Scenarios/Api/TestApi.cs | 2 +- test/Microsoft.ML.Tests/Scenarios/OvaTest.cs | 2 +- 14 files changed, 137 insertions(+), 107 deletions(-) delete mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/BinaryClassification/AveragedPerceptron.cs create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/BinaryClassification/AveragedPerceptron.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/BinaryClassification/AveragedPerceptron.cs deleted file mode 100644 index 8871711b61..0000000000 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/BinaryClassification/AveragedPerceptron.cs +++ /dev/null @@ -1,61 +0,0 @@ -using Microsoft.ML; - -namespace Microsoft.ML.Samples.Dynamic.BinaryClassification -{ - public static class AveragedPerceptron - { - public static void Example() - { - // In this examples we will use the adult income dataset. The goal is to predict - // if a person's income is above $50K or not, based on different pieces of information about that person. - // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult - - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this examples to make outputs deterministic. - var mlContext = new MLContext(seed: 0); - - // Download the dataset and load it as IDataView - var data = SamplesUtils.DatasetUtils.LoadAdultDataset(mlContext); - - // Leave out 10% of data for testing - var (trainData, testData) = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1); - - // Create data processing pipeline - var pipeline = - // Convert categorical features to one-hot vectors - mlContext.Transforms.Categorical.OneHotEncoding("workclass") - .Append(mlContext.Transforms.Categorical.OneHotEncoding("education")) - .Append(mlContext.Transforms.Categorical.OneHotEncoding("marital-status")) - .Append(mlContext.Transforms.Categorical.OneHotEncoding("occupation")) - .Append(mlContext.Transforms.Categorical.OneHotEncoding("relationship")) - .Append(mlContext.Transforms.Categorical.OneHotEncoding("ethnicity")) - .Append(mlContext.Transforms.Categorical.OneHotEncoding("native-country")) - // Combine all features into one feature vector - .Append(mlContext.Transforms.Concatenate("Features", "workclass", "education", "marital-status", - "occupation", "relationship", "ethnicity", "native-country", "age", "education-num", - "capital-gain", "capital-loss", "hours-per-week")) - // Min-max normalized all the features - .Append(mlContext.Transforms.Normalize("Features")) - // Add the trainer - .Append(mlContext.BinaryClassification.Trainers.AveragedPerceptron("IsOver50K", "Features")); - - // Fit this pipeline to the training data - var model = pipeline.Fit(trainData); - - // Evaluate how the model is doing on the test data - var dataWithPredictions = model.Transform(testData); - var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions, "IsOver50K"); - SamplesUtils.ConsoleUtils.PrintBinaryClassificationMetrics(metrics); - - // Output: - // Accuracy: 0.85 - // AUC: 0.90 - // F1 Score: 0.66 - // Negative Precision: 0.89 - // Negative Recall: 0.91 - // Positive Precision: 0.69 - // Positive Recall: 0.63 - } - } -} \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs new file mode 100644 index 0000000000..35bdefa434 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs @@ -0,0 +1,45 @@ +using Microsoft.ML; + +namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification +{ + public static class AveragedPerceptron + { + public static void Example() + { + // In this examples we will use the adult income dataset. The goal is to predict + // if a person's income is above $50K or not, based on different pieces of information about that person. + // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult + + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + // Setting the seed to a fixed number in this example to make outputs deterministic. + var mlContext = new MLContext(seed: 0); + + // Download and featurize the dataset + var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); + + // Leave out 10% of data for testing + var (trainData, testData) = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1); + + // Create data training pipeline + var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron("IsOver50K", "Features"); + + // Fit this pipeline to the training data + var model = pipeline.Fit(trainData); + + // Evaluate how the model is doing on the test data + var dataWithPredictions = model.Transform(testData); + var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions, "IsOver50K"); + SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + + // Output: + // Accuracy: 0.85 + // AUC: 0.90 + // F1 Score: 0.66 + // Negative Precision: 0.89 + // Negative Recall: 0.91 + // Positive Precision: 0.69 + // Positive Recall: 0.63 + } + } +} \ No newline at end of file diff --git a/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs b/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs index 814a251d6a..83fafd8658 100644 --- a/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs +++ b/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs @@ -5,9 +5,16 @@ namespace Microsoft.ML.SamplesUtils { + /// + /// Utilities for creating console outputs in samples' code. + /// public static class ConsoleUtils { - public static void PrintBinaryClassificationMetrics(BinaryClassificationMetrics metrics) + /// + /// Pretty-print BinaryClassificationMetrics objects. + /// + /// Binary classification metrics. + public static void PrintMetrics(BinaryClassificationMetrics metrics) { Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}"); Console.WriteLine($"AUC: {metrics.Auc:F2}"); diff --git a/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj b/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj index e4d6c5d504..0bdb047d42 100644 --- a/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj +++ b/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj @@ -6,7 +6,9 @@ + + diff --git a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs index 724f4d10f8..e6f45c0eeb 100644 --- a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs +++ b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs @@ -7,6 +7,7 @@ using System.IO; using System.Net; using Microsoft.Data.DataView; +using Microsoft.ML; using Microsoft.ML.Data; namespace Microsoft.ML.SamplesUtils @@ -81,7 +82,7 @@ public static string DownloadSentimentDataset() public static string DownloadAdultDataset() => Download("https://raw.githubusercontent.com/dotnet/machinelearning/244a8c2ac832657af282aa312d568211698790aa/test/data/adult.train", "adult.txt"); - public static IDataView LoadAdultDataset(MLContext mlContext) + public static IDataView LoadFeaturizedAdultDataset(MLContext mlContext) { // Download the file string dataFile = DownloadAdultDataset(); @@ -110,8 +111,28 @@ public static IDataView LoadAdultDataset(MLContext mlContext) hasHeader: true ); - return reader.Read(dataFile); + // Create data featurizing pipeline + var pipeline = + // Convert categorical features to one-hot vectors + mlContext.Transforms.Categorical.OneHotEncoding("workclass") + .Append(mlContext.Transforms.Categorical.OneHotEncoding("education")) + .Append(mlContext.Transforms.Categorical.OneHotEncoding("marital-status")) + .Append(mlContext.Transforms.Categorical.OneHotEncoding("occupation")) + .Append(mlContext.Transforms.Categorical.OneHotEncoding("relationship")) + .Append(mlContext.Transforms.Categorical.OneHotEncoding("ethnicity")) + .Append(mlContext.Transforms.Categorical.OneHotEncoding("native-country")) + // Combine all features into one feature vector + .Append(mlContext.Transforms.Concatenate("Features", "workclass", "education", "marital-status", + "occupation", "relationship", "ethnicity", "native-country", "age", "education-num", + "capital-gain", "capital-loss", "hours-per-week")) + // Min-max normalized all the features + .Append(mlContext.Transforms.Normalize("Features")); + + var data = reader.Read(dataFile); + var featurizedData = pipeline.Fit(data).Transform(data); + return featurizedData; } + /// /// Downloads the breast cancer dataset from the ML.NET repo. /// diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs index 0a73a0ac3b..6460dcc48e 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs @@ -19,7 +19,7 @@ namespace Microsoft.ML.Trainers.Online public abstract class AveragedLinearArguments : OnlineLinearArguments { /// - /// Learning rate + /// Learning rate. /// [Argument(ArgumentType.AtMostOnce, HelpText = "Learning rate", ShortName = "lr", SortOrder = 50)] [TGUI(Label = "Learning rate", SuggestedSweeps = "0.01,0.1,0.5,1.0")] @@ -27,9 +27,12 @@ public abstract class AveragedLinearArguments : OnlineLinearArguments public float LearningRate = AveragedDefaultArgs.LearningRate; /// - /// to decrease the learning rate as iterations progress; otherwise, . - /// Default is . + /// Determine whether to decrease the or not. /// + /// + /// to decrease the as iterations progress; otherwise, . + /// Default is . + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Decrease learning rate", ShortName = "decreaselr", SortOrder = 50)] [TGUI(Label = "Decrease Learning Rate", Description = "Decrease learning rate as iterations progress")] [TlcModule.SweepableDiscreteParam("DecreaseLearningRate", new object[] { false, true })] @@ -37,16 +40,21 @@ public abstract class AveragedLinearArguments : OnlineLinearArguments /// /// Number of examples after which weights will be reset to the current average. - /// Default is , which disables this feature. /// + /// + /// Default is , which disables this feature. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Number of examples after which weights will be reset to the current average", ShortName = "numreset")] public long? ResetWeightsAfterXExamples = null; /// + /// Determines when to update averaged weights. + /// + /// /// to update averaged weights only when loss is nonzero. /// to update averaged weights on every example. /// Default is . - /// + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Instead of updating averaged weights on every example, only update when loss is nonzero", ShortName = "lazy")] public bool DoLazyUpdates = true; @@ -60,23 +68,31 @@ public abstract class AveragedLinearArguments : OnlineLinearArguments /// /// Extra weight given to more recent updates. - /// Default is 0, i.e. no extra gain. /// + /// + /// Default is 0, i.e. no extra gain. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Extra weight given to more recent updates", ShortName = "rg")] public float RecencyGain = 0; /// + /// Determines whether is multiplicative or additive. + /// + /// /// means is multiplicative. /// means is additive. /// Default is . - /// + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Whether Recency Gain is multiplicative (vs. additive)", ShortName = "rgm")] public bool RecencyGainMulti = false; /// + /// Determines whether to do averaging or not. + /// + /// /// to do averaging; otherwise, . /// Default is . - /// + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Do averaging?", ShortName = "avg")] public bool Averaged = true; @@ -84,7 +100,7 @@ public abstract class AveragedLinearArguments : OnlineLinearArguments /// The inexactness tolerance for averaging. /// [Argument(ArgumentType.AtMostOnce, HelpText = "The inexactness tolerance for averaging", ShortName = "avgtol")] - public float AveragedTolerance = (float)1e-2; + internal float AveragedTolerance = (float)1e-2; [BestFriend] internal class AveragedDefaultArgs : OnlineDefaultArgs diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs index 2e82f8af5e..8de643f9d3 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs @@ -27,8 +27,10 @@ namespace Microsoft.ML.Trainers.Online { /// /// This is averaged perceptron trainer. - /// For usage details, please see /// + /// + /// For usage details, please see + /// public sealed class AveragedPerceptronTrainer : AveragedLinearTrainer, LinearBinaryModelParameters> { public const string LoadNameValue = "AveragedPerceptron"; @@ -41,7 +43,7 @@ public sealed class AveragedPerceptronTrainer : AveragedLinearTrainer - /// The custom loss. Default is hinge loss. + /// The custom loss. /// [Argument(ArgumentType.Multiple, HelpText = "Loss Function", ShortName = "loss", SortOrder = 50)] public ISupportClassificationLossFactory LossFunction = new HingeLoss.Arguments(); @@ -108,9 +110,9 @@ internal AveragedPerceptronTrainer(IHostEnvironment env, Options options) /// The name of the feature column. /// The optional name of the weights column. /// The learning rate. - /// Wheather to decrease learning rate as iterations progress. + /// Whether to decrease learning rate as iterations progress. /// L2 Regularization Weight. - /// The number of training iteraitons. + /// The number of training iterations. internal AveragedPerceptronTrainer(IHostEnvironment env, string labelColumn = DefaultColumnNames.Label, string featureColumn = DefaultColumnNames.Features, @@ -128,7 +130,7 @@ internal AveragedPerceptronTrainer(IHostEnvironment env, LearningRate = learningRate, DecreaseLearningRate = decreaseLearningRate, L2RegularizerWeight = l2RegularizerWeight, - NumIterations = numIterations, + NumberOfIterations = numIterations, LossFunction = new TrivialFactory(lossFunction ?? new HingeLoss()) }) { diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/LinearSvm.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/LinearSvm.cs index 184a6554aa..a214ce9505 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/LinearSvm.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/LinearSvm.cs @@ -240,7 +240,7 @@ internal LinearSvmTrainer(IHostEnvironment env, LabelColumn = labelColumn, FeatureColumn = featureColumn, InitialWeights = weightsColumn, - NumIterations = numIterations, + NumberOfIterations = numIterations, }) { } diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineGradientDescent.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineGradientDescent.cs index 39ed31a6ec..4c481904c3 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineGradientDescent.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineGradientDescent.cs @@ -115,7 +115,7 @@ internal OnlineGradientDescentTrainer(IHostEnvironment env, LearningRate = learningRate, DecreaseLearningRate = decreaseLearningRate, L2RegularizerWeight = l2RegularizerWeight, - NumIterations = numIterations, + NumberOfIterations = numIterations, LabelColumn = labelColumn, FeatureColumn = featureColumn, InitialWeights = weightsColumn, diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs index 4e1cb6e341..6ba61f77f5 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs @@ -26,7 +26,7 @@ public abstract class OnlineLinearArguments : LearnerInputBaseWithLabel [Argument(ArgumentType.AtMostOnce, HelpText = "Number of iterations", ShortName = "iter", SortOrder = 50)] [TGUI(Label = "Number of Iterations", Description = "Number of training iterations through data", SuggestedSweeps = "1,10,100")] [TlcModule.SweepableLongParamAttribute("NumIterations", 1, 100, stepSize: 10, isLogScale: true)] - public int NumIterations = OnlineDefaultArgs.NumIterations; + public int NumberOfIterations = OnlineDefaultArgs.NumIterations; /// /// Initial weights and bias, comma-separated. @@ -36,12 +36,16 @@ public abstract class OnlineLinearArguments : LearnerInputBaseWithLabel public string InitialWeights; /// - /// Initial weights scale. + /// Initial weights and bias scale. /// - [Argument(ArgumentType.AtMostOnce, HelpText = "Init weights diameter", ShortName = "initwts", SortOrder = 140)] + /// + /// This property is only used if the provided value is positive and is not specified. + /// The weights and bias will be randomly selected from InitialWeights * [-0.5,0.5] interval with uniform distribution. + /// + [Argument(ArgumentType.AtMostOnce, HelpText = "Init weights diameter", ShortName = "initwts, initWtsDiameter", SortOrder = 140)] [TGUI(Label = "Initial Weights Scale", SuggestedSweeps = "0,0.1,0.5,1")] [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0.0f, 1.0f, numSteps: 5)] - public float InitWtsDiameter = 0; + public float InitialWeightsDiameter = 0; /// /// to shuffle data for each training iteration; otherwise, . @@ -51,12 +55,6 @@ public abstract class OnlineLinearArguments : LearnerInputBaseWithLabel [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[] { false, true })] public bool Shuffle = true; - /// - /// Size of cache when trained in Scope. - /// - [Argument(ArgumentType.AtMostOnce, HelpText = "Size of cache when trained in Scope", ShortName = "cache")] - public int StreamingCacheSize = 1000000; - [BestFriend] internal class OnlineDefaultArgs { @@ -151,13 +149,13 @@ protected TrainStateBase(IChannel ch, int numFeatures, LinearModelParameters pre Weights = new VBuffer(numFeatures, weightValues); Bias = float.Parse(weightStr[numFeatures], CultureInfo.InvariantCulture); } - else if (parent.Args.InitWtsDiameter > 0) + else if (parent.Args.InitialWeightsDiameter > 0) { var weightValues = new float[numFeatures]; for (int i = 0; i < numFeatures; i++) - weightValues[i] = parent.Args.InitWtsDiameter * (parent.Host.Rand.NextSingle() - (float)0.5); + weightValues[i] = parent.Args.InitialWeightsDiameter * (parent.Host.Rand.NextSingle() - (float)0.5); Weights = new VBuffer(numFeatures, weightValues); - Bias = parent.Args.InitWtsDiameter * (parent.Host.Rand.NextSingle() - (float)0.5); + Bias = parent.Args.InitialWeightsDiameter * (parent.Host.Rand.NextSingle() - (float)0.5); } else if (numFeatures <= 1000) Weights = VBufferUtils.CreateDense(numFeatures); @@ -255,9 +253,8 @@ private protected OnlineLinearTrainer(OnlineLinearArguments args, IHostEnvironme : base(Contracts.CheckRef(env, nameof(env)).Register(name), TrainerUtils.MakeR4VecFeature(args.FeatureColumn), label, TrainerUtils.MakeR4ScalarWeightColumn(args.InitialWeights)) { Contracts.CheckValue(args, nameof(args)); - Contracts.CheckUserArg(args.NumIterations > 0, nameof(args.NumIterations), UserErrorPositive); - Contracts.CheckUserArg(args.InitWtsDiameter >= 0, nameof(args.InitWtsDiameter), UserErrorNonNegative); - Contracts.CheckUserArg(args.StreamingCacheSize > 0, nameof(args.StreamingCacheSize), UserErrorPositive); + Contracts.CheckUserArg(args.NumberOfIterations > 0, nameof(args.NumberOfIterations), UserErrorPositive); + Contracts.CheckUserArg(args.InitialWeightsDiameter >= 0, nameof(args.InitialWeightsDiameter), UserErrorNonNegative); Args = args; Name = name; @@ -307,7 +304,7 @@ private void TrainCore(IChannel ch, RoleMappedData data, TrainStateBase state) var cursorFactory = new FloatLabelCursor.Factory(data, cursorOpt); long numBad = 0; - while (state.Iteration < Args.NumIterations) + while (state.Iteration < Args.NumberOfIterations) { state.BeginIteration(ch); @@ -325,7 +322,7 @@ private void TrainCore(IChannel ch, RoleMappedData data, TrainStateBase state) { ch.Warning( "Skipped {0} instances with missing features during training (over {1} iterations; {2} inst/iter)", - numBad, Args.NumIterations, numBad / Args.NumIterations); + numBad, Args.NumberOfIterations, numBad / Args.NumberOfIterations); } } diff --git a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs index 01bbc7f60a..0fe34e2b2e 100644 --- a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs +++ b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs @@ -194,11 +194,12 @@ public static SdcaMultiClassTrainer StochasticDualCoordinateAscent(this Multicla /// Predict a target using a linear binary classification model trained with averaged perceptron trainer. /// /// - /// Perceptron is a classification algorithm that makes its predictions based on a linear function. - /// For instance with feature values f0, f1,..., f_D-1, the prediction is given by the sign of sigma[0, D-1] (w_i * f_i), where w_0, w_1,..., w_D-1 are the weights computed by the algorithm. + /// Perceptron is a classification algorithm that makes its predictions by finding a separating hyperplane. + /// For instance, with feature values f0, f1,..., f_D-1, the prediction is given by determining what side of the hyperplane the point falls into. + /// That is the same as the sign of sigma[0, D-1] (w_i * f_i), where w_0, w_1,..., w_D-1 are the weights computed by the algorithm. /// - /// Perceptron is an online algorithm, i.e., it processes the instances in the training set one at a time. - /// The weights are initialized to be 0, or some random values. Then, for each example in the training set, the value of sigma[0, D-1] (w_i * f_i) is computed. + /// The perceptron is an online algorithm, which means it processes the instances in the training set one at a time. + /// It starts with a set of initial weights (zero, random, or initialized from a previous learner). Then, for each example in the training set, the weighted sum of the features (sigma[0, D-1] (w_i * f_i)) is computed. /// If this value has the same sign as the label of the current example, the weights remain the same.If they have opposite signs, /// the weights vector is updated by either subtracting or adding (if the label is negative or positive, respectively) the feature vector of the current example, /// multiplied by a factor 0 < a <= 1, called the learning rate.In a generalization of this algorithm, the weights are updated by adding the feature vector multiplied by the learning rate, @@ -218,7 +219,7 @@ public static SdcaMultiClassTrainer StochasticDualCoordinateAscent(this Multicla /// The optional example weights. /// Learning rate. /// - /// to decrease the learning rate as iterations progress; otherwise, . + /// to decrease the as iterations progress; otherwise, . /// Default is . /// /// L2 weight for regularization. @@ -252,7 +253,7 @@ public static AveragedPerceptronTrainer AveragedPerceptron( /// For usage details, please see /// /// The binary classification catalog trainer object. - /// Advanced trainer options. + /// Trainer options. public static AveragedPerceptronTrainer AveragedPerceptron( this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, AveragedPerceptronTrainer.Options options) { diff --git a/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs b/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs index 339bede3e5..8d523ade17 100644 --- a/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs +++ b/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs @@ -748,7 +748,7 @@ public void TestEnsembleCombiner() { FeatureColumn = "Features", LabelColumn = DefaultColumnNames.Label, - NumIterations = 2, + NumberOfIterations = 2, TrainingData = dataView, NormalizeFeatures = NormalizeOption.No }).PredictorModel, diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/TestApi.cs b/test/Microsoft.ML.Tests/Scenarios/Api/TestApi.cs index bc43d688fb..c762401e74 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/TestApi.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/TestApi.cs @@ -182,7 +182,7 @@ public void TrainAveragedPerceptronWithCache() var cached = mlContext.Data.Cache(xf); var estimator = mlContext.BinaryClassification.Trainers.AveragedPerceptron( - new AveragedPerceptronTrainer.Options { NumIterations = 2 }); + new AveragedPerceptronTrainer.Options { NumberOfIterations = 2 }); estimator.Fit(cached).Transform(cached); diff --git a/test/Microsoft.ML.Tests/Scenarios/OvaTest.cs b/test/Microsoft.ML.Tests/Scenarios/OvaTest.cs index ecea5411a6..c708bb95b9 100644 --- a/test/Microsoft.ML.Tests/Scenarios/OvaTest.cs +++ b/test/Microsoft.ML.Tests/Scenarios/OvaTest.cs @@ -133,7 +133,7 @@ public void OvaLinearSvm() // Pipeline var pipeline = mlContext.MulticlassClassification.Trainers.OneVersusAll( - mlContext.BinaryClassification.Trainers.LinearSupportVectorMachines(new LinearSvmTrainer.Options { NumIterations = 100 }), + mlContext.BinaryClassification.Trainers.LinearSupportVectorMachines(new LinearSvmTrainer.Options { NumberOfIterations = 100 }), useProbabilities: false); var model = pipeline.Fit(data); From ceb3aa299b6971b6c40e337facf3a286e0210f04 Mon Sep 17 00:00:00 2001 From: Shahab Moradi Date: Tue, 12 Feb 2019 11:47:15 -0800 Subject: [PATCH 04/14] Added sample for the second overload with trainer options. --- .../AveragedPerceptronWithOptions.cs | 58 +++++++++++++++++++ .../StandardLearnersCatalog.cs | 7 +++ 2 files changed, 65 insertions(+) create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs new file mode 100644 index 0000000000..eaf8066398 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs @@ -0,0 +1,58 @@ +using Microsoft.ML; +using Microsoft.ML.Trainers.Online; + +namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification +{ + public static class AveragedPerceptronWithOptions + { + public static void Example() + { + // In this examples we will use the adult income dataset. The goal is to predict + // if a person's income is above $50K or not, based on different pieces of information about that person. + // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult + + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + // Setting the seed to a fixed number in this example to make outputs deterministic. + var mlContext = new MLContext(seed: 0); + + // Download and featurize the dataset + var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); + + // Leave out 10% of data for testing + var (trainData, testData) = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1); + + // Define the trainer options + var options = new AveragedPerceptronTrainer.Options() + { + LossFunction = new SmoothedHingeLoss.Arguments(), + LearningRate = 0.1f, + DoLazyUpdates = false, + RecencyGain = 0.1f, + NumberOfIterations = 10, + LabelColumn = "IsOver50K", + FeatureColumn = "Features" + }; + + // Create data training pipeline + var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron(options); + + // Fit this pipeline to the training data + var model = pipeline.Fit(trainData); + + // Evaluate how the model is doing on the test data + var dataWithPredictions = model.Transform(testData); + var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions, "IsOver50K"); + SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + + // Output: + // Accuracy: 0.86 + // AUC: 0.90 + // F1 Score: 0.66 + // Negative Precision: 0.89 + // Negative Recall: 0.93 + // Positive Precision: 0.72 + // Positive Recall: 0.61 + } + } +} \ No newline at end of file diff --git a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs index 0fe34e2b2e..d190a3509c 100644 --- a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs +++ b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs @@ -254,6 +254,13 @@ public static AveragedPerceptronTrainer AveragedPerceptron( /// /// The binary classification catalog trainer object. /// Trainer options. + /// + /// + /// + /// + /// public static AveragedPerceptronTrainer AveragedPerceptron( this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, AveragedPerceptronTrainer.Options options) { From 2b96f6daea01b09dc894b30f7b04f93042c13297 Mon Sep 17 00:00:00 2001 From: Shahab Moradi Date: Tue, 12 Feb 2019 14:20:19 -0800 Subject: [PATCH 05/14] Fixed the failing tests --- .../Common/EntryPoints/core_manifest.json | 57 ++++--------------- .../UnitTests/TestEntryPoints.cs | 5 +- 2 files changed, 14 insertions(+), 48 deletions(-) diff --git a/test/BaselineOutput/Common/EntryPoints/core_manifest.json b/test/BaselineOutput/Common/EntryPoints/core_manifest.json index 5154957bf1..5d720ce6de 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_manifest.json +++ b/test/BaselineOutput/Common/EntryPoints/core_manifest.json @@ -4306,7 +4306,7 @@ } }, { - "Name": "NumIterations", + "Name": "NumberOfIterations", "Type": "Int", "Desc": "Number of iterations", "Aliases": [ @@ -4325,11 +4325,12 @@ } }, { - "Name": "InitWtsDiameter", + "Name": "InitialWeightsDiameter", "Type": "Float", "Desc": "Init weights diameter", "Aliases": [ - "initwts" + "initwts", + "initWtsDiameter" ], "Required": false, "SortOrder": 140.0, @@ -4467,18 +4468,6 @@ true ] } - }, - { - "Name": "StreamingCacheSize", - "Type": "Int", - "Desc": "Size of cache when trained in Scope", - "Aliases": [ - "cache" - ], - "Required": false, - "SortOrder": 150.0, - "IsNullable": false, - "Default": 1000000 } ], "Outputs": [ @@ -13247,7 +13236,7 @@ } }, { - "Name": "NumIterations", + "Name": "NumberOfIterations", "Type": "Int", "Desc": "Number of iterations", "Aliases": [ @@ -13266,11 +13255,12 @@ } }, { - "Name": "InitWtsDiameter", + "Name": "InitialWeightsDiameter", "Type": "Float", "Desc": "Init weights diameter", "Aliases": [ - "initwts" + "initwts", + "initWtsDiameter" ], "Required": false, "SortOrder": 140.0, @@ -13353,18 +13343,6 @@ ] } }, - { - "Name": "StreamingCacheSize", - "Type": "Int", - "Desc": "Size of cache when trained in Scope", - "Aliases": [ - "cache" - ], - "Required": false, - "SortOrder": 150.0, - "IsNullable": false, - "Default": 1000000 - }, { "Name": "BatchSize", "Type": "Int", @@ -14272,7 +14250,7 @@ } }, { - "Name": "NumIterations", + "Name": "NumberOfIterations", "Type": "Int", "Desc": "Number of iterations", "Aliases": [ @@ -14291,11 +14269,12 @@ } }, { - "Name": "InitWtsDiameter", + "Name": "InitialWeightsDiameter", "Type": "Float", "Desc": "Init weights diameter", "Aliases": [ - "initwts" + "initwts", + "initWtsDiameter" ], "Required": false, "SortOrder": 140.0, @@ -14410,18 +14389,6 @@ true ] } - }, - { - "Name": "StreamingCacheSize", - "Type": "Int", - "Desc": "Size of cache when trained in Scope", - "Aliases": [ - "cache" - ], - "Required": false, - "SortOrder": 150.0, - "IsNullable": false, - "Default": 1000000 } ], "Outputs": [ diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs index 8fcb00842e..7035791227 100644 --- a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs +++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs @@ -5448,11 +5448,10 @@ public void TestOvaMacroWithUncalibratedLearner() 'RecencyGainMulti': false, 'Averaged': true, 'AveragedTolerance': 0.01, - 'NumIterations': 1, + 'NumberOfIterations': 1, 'InitialWeights': null, - 'InitWtsDiameter': 0.0, + 'InitialWeightsDiameter': 0.0, 'Shuffle': false, - 'StreamingCacheSize': 1000000, 'LabelColumn': 'Label', 'TrainingData': '$Var_9ccc8bce4f6540eb8a244ab40585602a', 'FeatureColumn': 'Features', From 87be8dc40107662221f7852c181a26c8d58e3a9d Mon Sep 17 00:00:00 2001 From: Shahab Moradi Date: Thu, 7 Feb 2019 16:00:25 -0800 Subject: [PATCH 06/14] Updated docs for AveragedPerceptron --- .../Standard/Online/AveragedLinear.cs | 35 +++++++++++++++++ .../Standard/Online/AveragedPerceptron.cs | 19 ++++++--- .../Standard/Online/OnlineLinear.cs | 16 ++++++++ .../Standard/Online/doc.xml | 39 ------------------- .../StandardLearnersCatalog.cs | 36 +++++++++++++---- 5 files changed, 93 insertions(+), 52 deletions(-) diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs index 85da68d1f9..a0f156fb74 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs @@ -17,36 +17,71 @@ namespace Microsoft.ML.Trainers.Online { public abstract class AveragedLinearArguments : OnlineLinearArguments { + /// + /// Learning rate + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Learning rate", ShortName = "lr", SortOrder = 50)] [TGUI(Label = "Learning rate", SuggestedSweeps = "0.01,0.1,0.5,1.0")] [TlcModule.SweepableDiscreteParam("LearningRate", new object[] { 0.01, 0.1, 0.5, 1.0 })] public float LearningRate = AveragedDefaultArgs.LearningRate; + /// + /// to decrease the learning rate as iterations progress; otherwise, . + /// Default is . + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Decrease learning rate", ShortName = "decreaselr", SortOrder = 50)] [TGUI(Label = "Decrease Learning Rate", Description = "Decrease learning rate as iterations progress")] [TlcModule.SweepableDiscreteParam("DecreaseLearningRate", new object[] { false, true })] public bool DecreaseLearningRate = AveragedDefaultArgs.DecreaseLearningRate; + /// + /// Number of examples after which weights will be reset to the current average. + /// Default is , which disables this feature. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Number of examples after which weights will be reset to the current average", ShortName = "numreset")] public long? ResetWeightsAfterXExamples = null; + /// + /// to update averaged weights only when loss is nonzero. + /// to update averaged weights on every example. + /// Default is . + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Instead of updating averaged weights on every example, only update when loss is nonzero", ShortName = "lazy")] public bool DoLazyUpdates = true; + /// + /// L2 weight for regularization. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "L2 Regularization Weight", ShortName = "reg", SortOrder = 50)] [TGUI(Label = "L2 Regularization Weight")] [TlcModule.SweepableFloatParam("L2RegularizerWeight", 0.0f, 0.4f)] public float L2RegularizerWeight = AveragedDefaultArgs.L2RegularizerWeight; + /// + /// Extra weight given to more recent updates. + /// Default is 0, i.e. no extra gain. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Extra weight given to more recent updates", ShortName = "rg")] public float RecencyGain = 0; + /// + /// means is multiplicative. + /// means is additive. + /// Default is . + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Whether Recency Gain is multiplicative (vs. additive)", ShortName = "rgm")] public bool RecencyGainMulti = false; + /// + /// to do averaging; otherwise, . + /// Default is . + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Do averaging?", ShortName = "avg")] public bool Averaged = true; + /// + /// The inexactness tolerance for averaging. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "The inexactness tolerance for averaging", ShortName = "avgtol")] public float AveragedTolerance = (float)1e-2; diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs index c3610c7dd1..ac72e2aceb 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs @@ -24,12 +24,10 @@ namespace Microsoft.ML.Trainers.Online { - // This is an averaged perceptron classifier. - // Configurable subcomponents: - // - Loss function. By default, hinge loss (aka max-margin avgd perceptron) - // - Feature normalization. By default, rescaling between min and max values for every feature - // - Prediction calibration to produce probabilities. Off by default, if on, uses exponential (aka Platt) calibration. - /// + /// + /// This is averaged perceptron trainer. + /// For usage details, please see + /// public sealed class AveragedPerceptronTrainer : AveragedLinearTrainer, LinearBinaryModelParameters> { public const string LoadNameValue = "AveragedPerceptron"; @@ -41,12 +39,21 @@ public sealed class AveragedPerceptronTrainer : AveragedLinearTrainer + /// The custom loss. Default is hinge loss. + /// [Argument(ArgumentType.Multiple, HelpText = "Loss Function", ShortName = "loss", SortOrder = 50)] public ISupportClassificationLossFactory LossFunction = new HingeLoss.Arguments(); + /// + /// The calibrator for producing probabilities. Default is exponential (aka Platt) calibration. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "The calibrator kind to apply to the predictor. Specify null for no calibration", Visibility = ArgumentAttribute.VisibilityType.EntryPointsOnly)] public ICalibratorTrainerFactory Calibrator = new PlattCalibratorTrainerFactory(); + /// + /// The maximum number of examples to use when training the calibrator. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "The maximum number of examples to use when training the calibrator", Visibility = ArgumentAttribute.VisibilityType.EntryPointsOnly)] public int MaxCalibrationExamples = 1000000; diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs index b41890a5fa..a1ff85ae01 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs @@ -19,24 +19,40 @@ namespace Microsoft.ML.Trainers.Online public abstract class OnlineLinearArguments : LearnerInputBaseWithLabel { + /// + /// Number of training iterations through the data. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Number of iterations", ShortName = "iter", SortOrder = 50)] [TGUI(Label = "Number of Iterations", Description = "Number of training iterations through data", SuggestedSweeps = "1,10,100")] [TlcModule.SweepableLongParamAttribute("NumIterations", 1, 100, stepSize: 10, isLogScale: true)] public int NumIterations = OnlineDefaultArgs.NumIterations; + /// + /// Initial weights and bias, comma-separated. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Initial Weights and bias, comma-separated", ShortName = "initweights")] [TGUI(NoSweep = true)] public string InitialWeights; + /// + /// Initial weights scale. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Init weights diameter", ShortName = "initwts", SortOrder = 140)] [TGUI(Label = "Initial Weights Scale", SuggestedSweeps = "0,0.1,0.5,1")] [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0.0f, 1.0f, numSteps: 5)] public float InitWtsDiameter = 0; + /// + /// to shuffle data for each training iteration; otherwise, . + /// Default is . + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Whether to shuffle for each training iteration", ShortName = "shuf")] [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[] { false, true })] public bool Shuffle = true; + /// + /// Size of cache when trained in Scope. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Size of cache when trained in Scope", ShortName = "cache")] public int StreamingCacheSize = 1000000; diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/doc.xml b/src/Microsoft.ML.StandardLearners/Standard/Online/doc.xml index 8e8f5dc2ba..292aeface5 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/doc.xml +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/doc.xml @@ -25,44 +25,5 @@ - - - - Averaged Perceptron Binary Classifier. - - - Perceptron is a classification algorithm that makes its predictions based on a linear function. - I.e., for an instance with feature values f0, f1,..., f_D-1, , the prediction is given by the sign of sigma[0,D-1] ( w_i * f_i), where w_0, w_1,...,w_D-1 are the weights computed by the algorithm. - - Perceptron is an online algorithm, i.e., it processes the instances in the training set one at a time. - The weights are initialized to be 0, or some random values. Then, for each example in the training set, the value of sigma[0, D-1] (w_i * f_i) is computed. - If this value has the same sign as the label of the current example, the weights remain the same. If they have opposite signs, - the weights vector is updated by either subtracting or adding (if the label is negative or positive, respectively) the feature vector of the current example, - multiplied by a factor 0 < a <= 1, called the learning rate. In a generalization of this algorithm, the weights are updated by adding the feature vector multiplied by the learning rate, - and by the gradient of some loss function (in the specific case described above, the loss is hinge-loss, whose gradient is 1 when it is non-zero). - - - In Averaged Perceptron (AKA voted-perceptron), the weight vectors are stored, - together with a weight that counts the number of iterations it survived (this is equivalent to storing the weight vector after every iteration, regardless of whether it was updated or not). - The prediction is then calculated by taking the weighted average of all the sums sigma[0, D-1] (w_i * f_i) or the different weight vectors. - - For more information see: - Wikipedia entry for Perceptron - Large Margin Classification Using the Perceptron Algorithm - - - - - - new AveragedPerceptronBinaryClassifier - { - NumIterations = 10, - L2RegularizerWeight = 0.01f, - LossFunction = new ExpLossClassificationLossFunction() - } - - - - diff --git a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs index ae03b46bc8..736d5b1165 100644 --- a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs +++ b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs @@ -190,16 +190,37 @@ public static SdcaMultiClassTrainer StochasticDualCoordinateAscent(this Multicla } /// - /// Predict a target using a linear binary classification model trained with the AveragedPerceptron trainer. + /// Predict a target using a linear binary classification model trained with averaged perceptron trainer. /// + /// + /// Perceptron is a classification algorithm that makes its predictions based on a linear function. + /// For instance with feature values f0, f1,..., f_D-1, the prediction is given by the sign of sigma[0, D-1] (w_i * f_i), where w_0, w_1,..., w_D-1 are the weights computed by the algorithm. + /// + /// Perceptron is an online algorithm, i.e., it processes the instances in the training set one at a time. + /// The weights are initialized to be 0, or some random values. Then, for each example in the training set, the value of sigma[0, D-1] (w_i * f_i) is computed. + /// If this value has the same sign as the label of the current example, the weights remain the same.If they have opposite signs, + /// the weights vector is updated by either subtracting or adding (if the label is negative or positive, respectively) the feature vector of the current example, + /// multiplied by a factor 0 < a <= 1, called the learning rate.In a generalization of this algorithm, the weights are updated by adding the feature vector multiplied by the learning rate, + /// and by the gradient of some loss function (in the specific case described above, the loss is hinge-loss, whose gradient is 1 when it is non-zero). + /// + /// In Averaged Perceptron (AKA voted-perceptron), the weight vectors are stored, + /// together with a weight that counts the number of iterations it survived (this is equivalent to storing the weight vector after every iteration, regardless of whether it was updated or not). + /// The prediction is then calculated by taking the weighted average of all the sums sigma[0, D-1] (w_i * f_i) or the different weight vectors. + /// + /// For more information see Wikipedia entry for Perceptron + /// or Large Margin Classification Using the Perceptron Algorithm + /// /// The binary classification catalog trainer object. /// The name of the label column, or dependent variable. /// The features, or independent variables. - /// The custom loss. + /// The custom loss. If , hinge loss will be used resulting in max-margin averaged perceptron. /// The optional example weights. - /// The learning Rate. - /// Decrease learning rate as iterations progress. - /// L2 regularization weight. + /// Learning rate. + /// + /// to decrease the learning rate as iterations progress; otherwise, . + /// Default is . + /// + /// L2 weight for regularization. /// Number of training iterations through the data. public static AveragedPerceptronTrainer AveragedPerceptron( this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, @@ -219,10 +240,11 @@ public static AveragedPerceptronTrainer AveragedPerceptron( } /// - /// Predict a target using a linear binary classification model trained with the AveragedPerceptron trainer. + /// Predict a target using a linear binary classification model trained with averaged perceptron trainer using advanced options. + /// For trainer details, please see the remarks for /// /// The binary classification catalog trainer object. - /// Advanced arguments to the algorithm. + /// Advanced trainer options. public static AveragedPerceptronTrainer AveragedPerceptron( this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, AveragedPerceptronTrainer.Options options) { From ba0abff0bade3fb33e2c3f1e634fd0c92e2f8acf Mon Sep 17 00:00:00 2001 From: Shahab Moradi Date: Fri, 8 Feb 2019 14:29:34 -0800 Subject: [PATCH 07/14] Added a sample --- .../AveragedPerceptron.cs | 61 +++++++++++++++++++ src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs | 21 +++++++ .../SamplesDatasetUtils.cs | 31 ++++++++++ .../StandardLearnersCatalog.cs | 9 ++- 4 files changed, 121 insertions(+), 1 deletion(-) create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/BinaryClassification/AveragedPerceptron.cs create mode 100644 src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/BinaryClassification/AveragedPerceptron.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/BinaryClassification/AveragedPerceptron.cs new file mode 100644 index 0000000000..8871711b61 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/BinaryClassification/AveragedPerceptron.cs @@ -0,0 +1,61 @@ +using Microsoft.ML; + +namespace Microsoft.ML.Samples.Dynamic.BinaryClassification +{ + public static class AveragedPerceptron + { + public static void Example() + { + // In this examples we will use the adult income dataset. The goal is to predict + // if a person's income is above $50K or not, based on different pieces of information about that person. + // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult + + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + // Setting the seed to a fixed number in this examples to make outputs deterministic. + var mlContext = new MLContext(seed: 0); + + // Download the dataset and load it as IDataView + var data = SamplesUtils.DatasetUtils.LoadAdultDataset(mlContext); + + // Leave out 10% of data for testing + var (trainData, testData) = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1); + + // Create data processing pipeline + var pipeline = + // Convert categorical features to one-hot vectors + mlContext.Transforms.Categorical.OneHotEncoding("workclass") + .Append(mlContext.Transforms.Categorical.OneHotEncoding("education")) + .Append(mlContext.Transforms.Categorical.OneHotEncoding("marital-status")) + .Append(mlContext.Transforms.Categorical.OneHotEncoding("occupation")) + .Append(mlContext.Transforms.Categorical.OneHotEncoding("relationship")) + .Append(mlContext.Transforms.Categorical.OneHotEncoding("ethnicity")) + .Append(mlContext.Transforms.Categorical.OneHotEncoding("native-country")) + // Combine all features into one feature vector + .Append(mlContext.Transforms.Concatenate("Features", "workclass", "education", "marital-status", + "occupation", "relationship", "ethnicity", "native-country", "age", "education-num", + "capital-gain", "capital-loss", "hours-per-week")) + // Min-max normalized all the features + .Append(mlContext.Transforms.Normalize("Features")) + // Add the trainer + .Append(mlContext.BinaryClassification.Trainers.AveragedPerceptron("IsOver50K", "Features")); + + // Fit this pipeline to the training data + var model = pipeline.Fit(trainData); + + // Evaluate how the model is doing on the test data + var dataWithPredictions = model.Transform(testData); + var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions, "IsOver50K"); + SamplesUtils.ConsoleUtils.PrintBinaryClassificationMetrics(metrics); + + // Output: + // Accuracy: 0.85 + // AUC: 0.90 + // F1 Score: 0.66 + // Negative Precision: 0.89 + // Negative Recall: 0.91 + // Positive Precision: 0.69 + // Positive Recall: 0.63 + } + } +} \ No newline at end of file diff --git a/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs b/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs new file mode 100644 index 0000000000..814a251d6a --- /dev/null +++ b/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs @@ -0,0 +1,21 @@ +using System; +using System.Collections.Generic; +using System.Text; +using Microsoft.ML.Data; + +namespace Microsoft.ML.SamplesUtils +{ + public static class ConsoleUtils + { + public static void PrintBinaryClassificationMetrics(BinaryClassificationMetrics metrics) + { + Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}"); + Console.WriteLine($"AUC: {metrics.Auc:F2}"); + Console.WriteLine($"F1 Score: {metrics.F1Score:F2}"); + Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}"); + Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}"); + Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}"); + Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}"); + } + } +} diff --git a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs index 17ce2e3ab7..e3969ba5e3 100644 --- a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs +++ b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs @@ -86,6 +86,37 @@ public static string DownloadSentimentDataset() public static string DownloadAdultDataset() => Download("https://raw.githubusercontent.com/dotnet/machinelearning/244a8c2ac832657af282aa312d568211698790aa/test/data/adult.train", "adult.txt"); + public static IDataView LoadAdultDataset(MLContext mlContext) + { + // Download the file + string dataFile = DownloadAdultDataset(); + + // Define the columns to read + var reader = mlContext.Data.CreateTextLoader( + columns: new[] + { + new TextLoader.Column("age", DataKind.R4, 0), + new TextLoader.Column("workclass", DataKind.TX, 1), + new TextLoader.Column("fnlwgt", DataKind.R4, 2), + new TextLoader.Column("education", DataKind.TX, 3), + new TextLoader.Column("education-num", DataKind.R4, 4), + new TextLoader.Column("marital-status", DataKind.TX, 5), + new TextLoader.Column("occupation", DataKind.TX, 6), + new TextLoader.Column("relationship", DataKind.TX, 7), + new TextLoader.Column("ethnicity", DataKind.TX, 8), + new TextLoader.Column("sex", DataKind.TX, 9), + new TextLoader.Column("capital-gain", DataKind.R4, 10), + new TextLoader.Column("capital-loss", DataKind.R4, 11), + new TextLoader.Column("hours-per-week", DataKind.R4, 12), + new TextLoader.Column("native-country", DataKind.R4, 13), + new TextLoader.Column("IsOver50K", DataKind.BL, 14), + }, + separatorChar: ',', + hasHeader: true + ); + + return reader.Read(dataFile); + } /// /// Downloads the breast cancer dataset from the ML.NET repo. /// diff --git a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs index 736d5b1165..f614c4098c 100644 --- a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs +++ b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs @@ -222,6 +222,13 @@ public static SdcaMultiClassTrainer StochasticDualCoordinateAscent(this Multicla /// /// L2 weight for regularization. /// Number of training iterations through the data. + /// + /// + /// + /// + /// public static AveragedPerceptronTrainer AveragedPerceptron( this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, string labelColumn = DefaultColumnNames.Label, @@ -241,7 +248,7 @@ public static AveragedPerceptronTrainer AveragedPerceptron( /// /// Predict a target using a linear binary classification model trained with averaged perceptron trainer using advanced options. - /// For trainer details, please see the remarks for + /// For usage details, please see /// /// The binary classification catalog trainer object. /// Advanced trainer options. From a5538edea7faaaf2fa93c76c854c366550949c24 Mon Sep 17 00:00:00 2001 From: Shahab Moradi Date: Tue, 12 Feb 2019 11:23:53 -0800 Subject: [PATCH 08/14] Addressed PR comments --- .../AveragedPerceptron.cs | 61 ------------------- .../AveragedPerceptron.cs | 45 ++++++++++++++ src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs | 9 ++- .../Microsoft.ML.SamplesUtils.csproj | 2 + .../SamplesDatasetUtils.cs | 25 +++++++- .../Standard/Online/AveragedLinear.cs | 34 ++++++++--- .../Standard/Online/AveragedPerceptron.cs | 12 ++-- .../Standard/Online/LinearSvm.cs | 2 +- .../Standard/Online/OnlineGradientDescent.cs | 2 +- .../Standard/Online/OnlineLinear.cs | 33 +++++----- .../StandardLearnersCatalog.cs | 13 ++-- .../TestPredictors.cs | 2 +- .../Scenarios/Api/TestApi.cs | 2 +- test/Microsoft.ML.Tests/Scenarios/OvaTest.cs | 2 +- 14 files changed, 137 insertions(+), 107 deletions(-) delete mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/BinaryClassification/AveragedPerceptron.cs create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/BinaryClassification/AveragedPerceptron.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/BinaryClassification/AveragedPerceptron.cs deleted file mode 100644 index 8871711b61..0000000000 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/BinaryClassification/AveragedPerceptron.cs +++ /dev/null @@ -1,61 +0,0 @@ -using Microsoft.ML; - -namespace Microsoft.ML.Samples.Dynamic.BinaryClassification -{ - public static class AveragedPerceptron - { - public static void Example() - { - // In this examples we will use the adult income dataset. The goal is to predict - // if a person's income is above $50K or not, based on different pieces of information about that person. - // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult - - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this examples to make outputs deterministic. - var mlContext = new MLContext(seed: 0); - - // Download the dataset and load it as IDataView - var data = SamplesUtils.DatasetUtils.LoadAdultDataset(mlContext); - - // Leave out 10% of data for testing - var (trainData, testData) = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1); - - // Create data processing pipeline - var pipeline = - // Convert categorical features to one-hot vectors - mlContext.Transforms.Categorical.OneHotEncoding("workclass") - .Append(mlContext.Transforms.Categorical.OneHotEncoding("education")) - .Append(mlContext.Transforms.Categorical.OneHotEncoding("marital-status")) - .Append(mlContext.Transforms.Categorical.OneHotEncoding("occupation")) - .Append(mlContext.Transforms.Categorical.OneHotEncoding("relationship")) - .Append(mlContext.Transforms.Categorical.OneHotEncoding("ethnicity")) - .Append(mlContext.Transforms.Categorical.OneHotEncoding("native-country")) - // Combine all features into one feature vector - .Append(mlContext.Transforms.Concatenate("Features", "workclass", "education", "marital-status", - "occupation", "relationship", "ethnicity", "native-country", "age", "education-num", - "capital-gain", "capital-loss", "hours-per-week")) - // Min-max normalized all the features - .Append(mlContext.Transforms.Normalize("Features")) - // Add the trainer - .Append(mlContext.BinaryClassification.Trainers.AveragedPerceptron("IsOver50K", "Features")); - - // Fit this pipeline to the training data - var model = pipeline.Fit(trainData); - - // Evaluate how the model is doing on the test data - var dataWithPredictions = model.Transform(testData); - var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions, "IsOver50K"); - SamplesUtils.ConsoleUtils.PrintBinaryClassificationMetrics(metrics); - - // Output: - // Accuracy: 0.85 - // AUC: 0.90 - // F1 Score: 0.66 - // Negative Precision: 0.89 - // Negative Recall: 0.91 - // Positive Precision: 0.69 - // Positive Recall: 0.63 - } - } -} \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs new file mode 100644 index 0000000000..35bdefa434 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs @@ -0,0 +1,45 @@ +using Microsoft.ML; + +namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification +{ + public static class AveragedPerceptron + { + public static void Example() + { + // In this examples we will use the adult income dataset. The goal is to predict + // if a person's income is above $50K or not, based on different pieces of information about that person. + // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult + + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + // Setting the seed to a fixed number in this example to make outputs deterministic. + var mlContext = new MLContext(seed: 0); + + // Download and featurize the dataset + var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); + + // Leave out 10% of data for testing + var (trainData, testData) = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1); + + // Create data training pipeline + var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron("IsOver50K", "Features"); + + // Fit this pipeline to the training data + var model = pipeline.Fit(trainData); + + // Evaluate how the model is doing on the test data + var dataWithPredictions = model.Transform(testData); + var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions, "IsOver50K"); + SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + + // Output: + // Accuracy: 0.85 + // AUC: 0.90 + // F1 Score: 0.66 + // Negative Precision: 0.89 + // Negative Recall: 0.91 + // Positive Precision: 0.69 + // Positive Recall: 0.63 + } + } +} \ No newline at end of file diff --git a/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs b/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs index 814a251d6a..83fafd8658 100644 --- a/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs +++ b/src/Microsoft.ML.SamplesUtils/ConsoleUtils.cs @@ -5,9 +5,16 @@ namespace Microsoft.ML.SamplesUtils { + /// + /// Utilities for creating console outputs in samples' code. + /// public static class ConsoleUtils { - public static void PrintBinaryClassificationMetrics(BinaryClassificationMetrics metrics) + /// + /// Pretty-print BinaryClassificationMetrics objects. + /// + /// Binary classification metrics. + public static void PrintMetrics(BinaryClassificationMetrics metrics) { Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}"); Console.WriteLine($"AUC: {metrics.Auc:F2}"); diff --git a/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj b/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj index e4d6c5d504..0bdb047d42 100644 --- a/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj +++ b/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj @@ -6,7 +6,9 @@ + + diff --git a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs index e3969ba5e3..918cd26a9d 100644 --- a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs +++ b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs @@ -7,6 +7,7 @@ using System.IO; using System.Net; using Microsoft.Data.DataView; +using Microsoft.ML; using Microsoft.ML.Data; namespace Microsoft.ML.SamplesUtils @@ -86,7 +87,7 @@ public static string DownloadSentimentDataset() public static string DownloadAdultDataset() => Download("https://raw.githubusercontent.com/dotnet/machinelearning/244a8c2ac832657af282aa312d568211698790aa/test/data/adult.train", "adult.txt"); - public static IDataView LoadAdultDataset(MLContext mlContext) + public static IDataView LoadFeaturizedAdultDataset(MLContext mlContext) { // Download the file string dataFile = DownloadAdultDataset(); @@ -115,8 +116,28 @@ public static IDataView LoadAdultDataset(MLContext mlContext) hasHeader: true ); - return reader.Read(dataFile); + // Create data featurizing pipeline + var pipeline = + // Convert categorical features to one-hot vectors + mlContext.Transforms.Categorical.OneHotEncoding("workclass") + .Append(mlContext.Transforms.Categorical.OneHotEncoding("education")) + .Append(mlContext.Transforms.Categorical.OneHotEncoding("marital-status")) + .Append(mlContext.Transforms.Categorical.OneHotEncoding("occupation")) + .Append(mlContext.Transforms.Categorical.OneHotEncoding("relationship")) + .Append(mlContext.Transforms.Categorical.OneHotEncoding("ethnicity")) + .Append(mlContext.Transforms.Categorical.OneHotEncoding("native-country")) + // Combine all features into one feature vector + .Append(mlContext.Transforms.Concatenate("Features", "workclass", "education", "marital-status", + "occupation", "relationship", "ethnicity", "native-country", "age", "education-num", + "capital-gain", "capital-loss", "hours-per-week")) + // Min-max normalized all the features + .Append(mlContext.Transforms.Normalize("Features")); + + var data = reader.Read(dataFile); + var featurizedData = pipeline.Fit(data).Transform(data); + return featurizedData; } + /// /// Downloads the breast cancer dataset from the ML.NET repo. /// diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs index a0f156fb74..9234cd2df4 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs @@ -18,7 +18,7 @@ namespace Microsoft.ML.Trainers.Online public abstract class AveragedLinearArguments : OnlineLinearArguments { /// - /// Learning rate + /// Learning rate. /// [Argument(ArgumentType.AtMostOnce, HelpText = "Learning rate", ShortName = "lr", SortOrder = 50)] [TGUI(Label = "Learning rate", SuggestedSweeps = "0.01,0.1,0.5,1.0")] @@ -26,9 +26,12 @@ public abstract class AveragedLinearArguments : OnlineLinearArguments public float LearningRate = AveragedDefaultArgs.LearningRate; /// - /// to decrease the learning rate as iterations progress; otherwise, . - /// Default is . + /// Determine whether to decrease the or not. /// + /// + /// to decrease the as iterations progress; otherwise, . + /// Default is . + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Decrease learning rate", ShortName = "decreaselr", SortOrder = 50)] [TGUI(Label = "Decrease Learning Rate", Description = "Decrease learning rate as iterations progress")] [TlcModule.SweepableDiscreteParam("DecreaseLearningRate", new object[] { false, true })] @@ -36,16 +39,21 @@ public abstract class AveragedLinearArguments : OnlineLinearArguments /// /// Number of examples after which weights will be reset to the current average. - /// Default is , which disables this feature. /// + /// + /// Default is , which disables this feature. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Number of examples after which weights will be reset to the current average", ShortName = "numreset")] public long? ResetWeightsAfterXExamples = null; /// + /// Determines when to update averaged weights. + /// + /// /// to update averaged weights only when loss is nonzero. /// to update averaged weights on every example. /// Default is . - /// + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Instead of updating averaged weights on every example, only update when loss is nonzero", ShortName = "lazy")] public bool DoLazyUpdates = true; @@ -59,23 +67,31 @@ public abstract class AveragedLinearArguments : OnlineLinearArguments /// /// Extra weight given to more recent updates. - /// Default is 0, i.e. no extra gain. /// + /// + /// Default is 0, i.e. no extra gain. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Extra weight given to more recent updates", ShortName = "rg")] public float RecencyGain = 0; /// + /// Determines whether is multiplicative or additive. + /// + /// /// means is multiplicative. /// means is additive. /// Default is . - /// + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Whether Recency Gain is multiplicative (vs. additive)", ShortName = "rgm")] public bool RecencyGainMulti = false; /// + /// Determines whether to do averaging or not. + /// + /// /// to do averaging; otherwise, . /// Default is . - /// + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Do averaging?", ShortName = "avg")] public bool Averaged = true; @@ -83,7 +99,7 @@ public abstract class AveragedLinearArguments : OnlineLinearArguments /// The inexactness tolerance for averaging. /// [Argument(ArgumentType.AtMostOnce, HelpText = "The inexactness tolerance for averaging", ShortName = "avgtol")] - public float AveragedTolerance = (float)1e-2; + internal float AveragedTolerance = (float)1e-2; [BestFriend] internal class AveragedDefaultArgs : OnlineDefaultArgs diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs index ac72e2aceb..d3dbdf619e 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs @@ -26,8 +26,10 @@ namespace Microsoft.ML.Trainers.Online { /// /// This is averaged perceptron trainer. - /// For usage details, please see /// + /// + /// For usage details, please see + /// public sealed class AveragedPerceptronTrainer : AveragedLinearTrainer, LinearBinaryModelParameters> { public const string LoadNameValue = "AveragedPerceptron"; @@ -40,7 +42,7 @@ public sealed class AveragedPerceptronTrainer : AveragedLinearTrainer - /// The custom loss. Default is hinge loss. + /// The custom loss. /// [Argument(ArgumentType.Multiple, HelpText = "Loss Function", ShortName = "loss", SortOrder = 50)] public ISupportClassificationLossFactory LossFunction = new HingeLoss.Arguments(); @@ -107,9 +109,9 @@ internal AveragedPerceptronTrainer(IHostEnvironment env, Options options) /// The name of the feature column. /// The optional name of the weights column. /// The learning rate. - /// Wheather to decrease learning rate as iterations progress. + /// Whether to decrease learning rate as iterations progress. /// L2 Regularization Weight. - /// The number of training iteraitons. + /// The number of training iterations. internal AveragedPerceptronTrainer(IHostEnvironment env, string labelColumn = DefaultColumnNames.Label, string featureColumn = DefaultColumnNames.Features, @@ -127,7 +129,7 @@ internal AveragedPerceptronTrainer(IHostEnvironment env, LearningRate = learningRate, DecreaseLearningRate = decreaseLearningRate, L2RegularizerWeight = l2RegularizerWeight, - NumIterations = numIterations, + NumberOfIterations = numIterations, LossFunction = new TrivialFactory(lossFunction ?? new HingeLoss()) }) { diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/LinearSvm.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/LinearSvm.cs index e1382b3038..2cd75e623c 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/LinearSvm.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/LinearSvm.cs @@ -239,7 +239,7 @@ internal LinearSvmTrainer(IHostEnvironment env, LabelColumn = labelColumn, FeatureColumn = featureColumn, InitialWeights = weightsColumn, - NumIterations = numIterations, + NumberOfIterations = numIterations, }) { } diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineGradientDescent.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineGradientDescent.cs index 8982127170..b683aefb07 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineGradientDescent.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineGradientDescent.cs @@ -114,7 +114,7 @@ internal OnlineGradientDescentTrainer(IHostEnvironment env, LearningRate = learningRate, DecreaseLearningRate = decreaseLearningRate, L2RegularizerWeight = l2RegularizerWeight, - NumIterations = numIterations, + NumberOfIterations = numIterations, LabelColumn = labelColumn, FeatureColumn = featureColumn, InitialWeights = weightsColumn, diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs index a1ff85ae01..4dbdc8da57 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs @@ -25,7 +25,7 @@ public abstract class OnlineLinearArguments : LearnerInputBaseWithLabel [Argument(ArgumentType.AtMostOnce, HelpText = "Number of iterations", ShortName = "iter", SortOrder = 50)] [TGUI(Label = "Number of Iterations", Description = "Number of training iterations through data", SuggestedSweeps = "1,10,100")] [TlcModule.SweepableLongParamAttribute("NumIterations", 1, 100, stepSize: 10, isLogScale: true)] - public int NumIterations = OnlineDefaultArgs.NumIterations; + public int NumberOfIterations = OnlineDefaultArgs.NumIterations; /// /// Initial weights and bias, comma-separated. @@ -35,12 +35,16 @@ public abstract class OnlineLinearArguments : LearnerInputBaseWithLabel public string InitialWeights; /// - /// Initial weights scale. + /// Initial weights and bias scale. /// - [Argument(ArgumentType.AtMostOnce, HelpText = "Init weights diameter", ShortName = "initwts", SortOrder = 140)] + /// + /// This property is only used if the provided value is positive and is not specified. + /// The weights and bias will be randomly selected from InitialWeights * [-0.5,0.5] interval with uniform distribution. + /// + [Argument(ArgumentType.AtMostOnce, HelpText = "Init weights diameter", ShortName = "initwts, initWtsDiameter", SortOrder = 140)] [TGUI(Label = "Initial Weights Scale", SuggestedSweeps = "0,0.1,0.5,1")] [TlcModule.SweepableFloatParamAttribute("InitWtsDiameter", 0.0f, 1.0f, numSteps: 5)] - public float InitWtsDiameter = 0; + public float InitialWeightsDiameter = 0; /// /// to shuffle data for each training iteration; otherwise, . @@ -50,12 +54,6 @@ public abstract class OnlineLinearArguments : LearnerInputBaseWithLabel [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[] { false, true })] public bool Shuffle = true; - /// - /// Size of cache when trained in Scope. - /// - [Argument(ArgumentType.AtMostOnce, HelpText = "Size of cache when trained in Scope", ShortName = "cache")] - public int StreamingCacheSize = 1000000; - [BestFriend] internal class OnlineDefaultArgs { @@ -150,13 +148,13 @@ protected TrainStateBase(IChannel ch, int numFeatures, LinearModelParameters pre Weights = new VBuffer(numFeatures, weightValues); Bias = float.Parse(weightStr[numFeatures], CultureInfo.InvariantCulture); } - else if (parent.Args.InitWtsDiameter > 0) + else if (parent.Args.InitialWeightsDiameter > 0) { var weightValues = new float[numFeatures]; for (int i = 0; i < numFeatures; i++) - weightValues[i] = parent.Args.InitWtsDiameter * (parent.Host.Rand.NextSingle() - (float)0.5); + weightValues[i] = parent.Args.InitialWeightsDiameter * (parent.Host.Rand.NextSingle() - (float)0.5); Weights = new VBuffer(numFeatures, weightValues); - Bias = parent.Args.InitWtsDiameter * (parent.Host.Rand.NextSingle() - (float)0.5); + Bias = parent.Args.InitialWeightsDiameter * (parent.Host.Rand.NextSingle() - (float)0.5); } else if (numFeatures <= 1000) Weights = VBufferUtils.CreateDense(numFeatures); @@ -254,9 +252,8 @@ private protected OnlineLinearTrainer(OnlineLinearArguments args, IHostEnvironme : base(Contracts.CheckRef(env, nameof(env)).Register(name), TrainerUtils.MakeR4VecFeature(args.FeatureColumn), label, TrainerUtils.MakeR4ScalarWeightColumn(args.InitialWeights)) { Contracts.CheckValue(args, nameof(args)); - Contracts.CheckUserArg(args.NumIterations > 0, nameof(args.NumIterations), UserErrorPositive); - Contracts.CheckUserArg(args.InitWtsDiameter >= 0, nameof(args.InitWtsDiameter), UserErrorNonNegative); - Contracts.CheckUserArg(args.StreamingCacheSize > 0, nameof(args.StreamingCacheSize), UserErrorPositive); + Contracts.CheckUserArg(args.NumberOfIterations > 0, nameof(args.NumberOfIterations), UserErrorPositive); + Contracts.CheckUserArg(args.InitialWeightsDiameter >= 0, nameof(args.InitialWeightsDiameter), UserErrorNonNegative); Args = args; Name = name; @@ -315,7 +312,7 @@ private void TrainCore(IChannel ch, RoleMappedData data, TrainStateBase state) var cursorFactory = new FloatLabelCursor.Factory(data, cursorOpt); long numBad = 0; - while (state.Iteration < Args.NumIterations) + while (state.Iteration < Args.NumberOfIterations) { state.BeginIteration(ch); @@ -333,7 +330,7 @@ private void TrainCore(IChannel ch, RoleMappedData data, TrainStateBase state) { ch.Warning( "Skipped {0} instances with missing features during training (over {1} iterations; {2} inst/iter)", - numBad, Args.NumIterations, numBad / Args.NumIterations); + numBad, Args.NumberOfIterations, numBad / Args.NumberOfIterations); } } diff --git a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs index f614c4098c..047ea48440 100644 --- a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs +++ b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs @@ -193,11 +193,12 @@ public static SdcaMultiClassTrainer StochasticDualCoordinateAscent(this Multicla /// Predict a target using a linear binary classification model trained with averaged perceptron trainer. /// /// - /// Perceptron is a classification algorithm that makes its predictions based on a linear function. - /// For instance with feature values f0, f1,..., f_D-1, the prediction is given by the sign of sigma[0, D-1] (w_i * f_i), where w_0, w_1,..., w_D-1 are the weights computed by the algorithm. + /// Perceptron is a classification algorithm that makes its predictions by finding a separating hyperplane. + /// For instance, with feature values f0, f1,..., f_D-1, the prediction is given by determining what side of the hyperplane the point falls into. + /// That is the same as the sign of sigma[0, D-1] (w_i * f_i), where w_0, w_1,..., w_D-1 are the weights computed by the algorithm. /// - /// Perceptron is an online algorithm, i.e., it processes the instances in the training set one at a time. - /// The weights are initialized to be 0, or some random values. Then, for each example in the training set, the value of sigma[0, D-1] (w_i * f_i) is computed. + /// The perceptron is an online algorithm, which means it processes the instances in the training set one at a time. + /// It starts with a set of initial weights (zero, random, or initialized from a previous learner). Then, for each example in the training set, the weighted sum of the features (sigma[0, D-1] (w_i * f_i)) is computed. /// If this value has the same sign as the label of the current example, the weights remain the same.If they have opposite signs, /// the weights vector is updated by either subtracting or adding (if the label is negative or positive, respectively) the feature vector of the current example, /// multiplied by a factor 0 < a <= 1, called the learning rate.In a generalization of this algorithm, the weights are updated by adding the feature vector multiplied by the learning rate, @@ -217,7 +218,7 @@ public static SdcaMultiClassTrainer StochasticDualCoordinateAscent(this Multicla /// The optional example weights. /// Learning rate. /// - /// to decrease the learning rate as iterations progress; otherwise, . + /// to decrease the as iterations progress; otherwise, . /// Default is . /// /// L2 weight for regularization. @@ -251,7 +252,7 @@ public static AveragedPerceptronTrainer AveragedPerceptron( /// For usage details, please see /// /// The binary classification catalog trainer object. - /// Advanced trainer options. + /// Trainer options. public static AveragedPerceptronTrainer AveragedPerceptron( this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, AveragedPerceptronTrainer.Options options) { diff --git a/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs b/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs index 464fd0dc59..f509c71908 100644 --- a/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs +++ b/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs @@ -744,7 +744,7 @@ public void TestEnsembleCombiner() { FeatureColumn = "Features", LabelColumn = DefaultColumnNames.Label, - NumIterations = 2, + NumberOfIterations = 2, TrainingData = dataView, NormalizeFeatures = NormalizeOption.No }).PredictorModel, diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/TestApi.cs b/test/Microsoft.ML.Tests/Scenarios/Api/TestApi.cs index 4b7fdbfaff..ac54587b65 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/TestApi.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/TestApi.cs @@ -182,7 +182,7 @@ public void TrainAveragedPerceptronWithCache() var cached = mlContext.Data.Cache(xf); var estimator = mlContext.BinaryClassification.Trainers.AveragedPerceptron( - new AveragedPerceptronTrainer.Options { NumIterations = 2 }); + new AveragedPerceptronTrainer.Options { NumberOfIterations = 2 }); estimator.Fit(cached).Transform(cached); diff --git a/test/Microsoft.ML.Tests/Scenarios/OvaTest.cs b/test/Microsoft.ML.Tests/Scenarios/OvaTest.cs index 1ef0cda99c..9954669bd3 100644 --- a/test/Microsoft.ML.Tests/Scenarios/OvaTest.cs +++ b/test/Microsoft.ML.Tests/Scenarios/OvaTest.cs @@ -131,7 +131,7 @@ public void OvaLinearSvm() // Pipeline var pipeline = mlContext.MulticlassClassification.Trainers.OneVersusAll( - mlContext.BinaryClassification.Trainers.LinearSupportVectorMachines(new LinearSvmTrainer.Options { NumIterations = 100 }), + mlContext.BinaryClassification.Trainers.LinearSupportVectorMachines(new LinearSvmTrainer.Options { NumberOfIterations = 100 }), useProbabilities: false); var model = pipeline.Fit(data); From 6b5606547497b3bb4a5e81a55523c9e4a41557a9 Mon Sep 17 00:00:00 2001 From: Shahab Moradi Date: Tue, 12 Feb 2019 11:47:15 -0800 Subject: [PATCH 09/14] Added sample for the second overload with trainer options. --- .../AveragedPerceptronWithOptions.cs | 58 +++++++++++++++++++ .../StandardLearnersCatalog.cs | 7 +++ 2 files changed, 65 insertions(+) create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs new file mode 100644 index 0000000000..eaf8066398 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs @@ -0,0 +1,58 @@ +using Microsoft.ML; +using Microsoft.ML.Trainers.Online; + +namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification +{ + public static class AveragedPerceptronWithOptions + { + public static void Example() + { + // In this examples we will use the adult income dataset. The goal is to predict + // if a person's income is above $50K or not, based on different pieces of information about that person. + // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult + + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + // Setting the seed to a fixed number in this example to make outputs deterministic. + var mlContext = new MLContext(seed: 0); + + // Download and featurize the dataset + var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); + + // Leave out 10% of data for testing + var (trainData, testData) = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1); + + // Define the trainer options + var options = new AveragedPerceptronTrainer.Options() + { + LossFunction = new SmoothedHingeLoss.Arguments(), + LearningRate = 0.1f, + DoLazyUpdates = false, + RecencyGain = 0.1f, + NumberOfIterations = 10, + LabelColumn = "IsOver50K", + FeatureColumn = "Features" + }; + + // Create data training pipeline + var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron(options); + + // Fit this pipeline to the training data + var model = pipeline.Fit(trainData); + + // Evaluate how the model is doing on the test data + var dataWithPredictions = model.Transform(testData); + var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions, "IsOver50K"); + SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + + // Output: + // Accuracy: 0.86 + // AUC: 0.90 + // F1 Score: 0.66 + // Negative Precision: 0.89 + // Negative Recall: 0.93 + // Positive Precision: 0.72 + // Positive Recall: 0.61 + } + } +} \ No newline at end of file diff --git a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs index 047ea48440..d78464acde 100644 --- a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs +++ b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs @@ -253,6 +253,13 @@ public static AveragedPerceptronTrainer AveragedPerceptron( /// /// The binary classification catalog trainer object. /// Trainer options. + /// + /// + /// + /// + /// public static AveragedPerceptronTrainer AveragedPerceptron( this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, AveragedPerceptronTrainer.Options options) { From c48773395579b33500591957ecab58e2beaf4fc5 Mon Sep 17 00:00:00 2001 From: Shahab Moradi Date: Tue, 12 Feb 2019 14:20:19 -0800 Subject: [PATCH 10/14] Fixed the failing tests --- .../Common/EntryPoints/core_manifest.json | 57 ++++--------------- .../UnitTests/TestEntryPoints.cs | 5 +- 2 files changed, 14 insertions(+), 48 deletions(-) diff --git a/test/BaselineOutput/Common/EntryPoints/core_manifest.json b/test/BaselineOutput/Common/EntryPoints/core_manifest.json index 5154957bf1..5d720ce6de 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_manifest.json +++ b/test/BaselineOutput/Common/EntryPoints/core_manifest.json @@ -4306,7 +4306,7 @@ } }, { - "Name": "NumIterations", + "Name": "NumberOfIterations", "Type": "Int", "Desc": "Number of iterations", "Aliases": [ @@ -4325,11 +4325,12 @@ } }, { - "Name": "InitWtsDiameter", + "Name": "InitialWeightsDiameter", "Type": "Float", "Desc": "Init weights diameter", "Aliases": [ - "initwts" + "initwts", + "initWtsDiameter" ], "Required": false, "SortOrder": 140.0, @@ -4467,18 +4468,6 @@ true ] } - }, - { - "Name": "StreamingCacheSize", - "Type": "Int", - "Desc": "Size of cache when trained in Scope", - "Aliases": [ - "cache" - ], - "Required": false, - "SortOrder": 150.0, - "IsNullable": false, - "Default": 1000000 } ], "Outputs": [ @@ -13247,7 +13236,7 @@ } }, { - "Name": "NumIterations", + "Name": "NumberOfIterations", "Type": "Int", "Desc": "Number of iterations", "Aliases": [ @@ -13266,11 +13255,12 @@ } }, { - "Name": "InitWtsDiameter", + "Name": "InitialWeightsDiameter", "Type": "Float", "Desc": "Init weights diameter", "Aliases": [ - "initwts" + "initwts", + "initWtsDiameter" ], "Required": false, "SortOrder": 140.0, @@ -13353,18 +13343,6 @@ ] } }, - { - "Name": "StreamingCacheSize", - "Type": "Int", - "Desc": "Size of cache when trained in Scope", - "Aliases": [ - "cache" - ], - "Required": false, - "SortOrder": 150.0, - "IsNullable": false, - "Default": 1000000 - }, { "Name": "BatchSize", "Type": "Int", @@ -14272,7 +14250,7 @@ } }, { - "Name": "NumIterations", + "Name": "NumberOfIterations", "Type": "Int", "Desc": "Number of iterations", "Aliases": [ @@ -14291,11 +14269,12 @@ } }, { - "Name": "InitWtsDiameter", + "Name": "InitialWeightsDiameter", "Type": "Float", "Desc": "Init weights diameter", "Aliases": [ - "initwts" + "initwts", + "initWtsDiameter" ], "Required": false, "SortOrder": 140.0, @@ -14410,18 +14389,6 @@ true ] } - }, - { - "Name": "StreamingCacheSize", - "Type": "Int", - "Desc": "Size of cache when trained in Scope", - "Aliases": [ - "cache" - ], - "Required": false, - "SortOrder": 150.0, - "IsNullable": false, - "Default": 1000000 } ], "Outputs": [ diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs index 8b0c46b817..512329a123 100644 --- a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs +++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs @@ -5447,11 +5447,10 @@ public void TestOvaMacroWithUncalibratedLearner() 'RecencyGainMulti': false, 'Averaged': true, 'AveragedTolerance': 0.01, - 'NumIterations': 1, + 'NumberOfIterations': 1, 'InitialWeights': null, - 'InitWtsDiameter': 0.0, + 'InitialWeightsDiameter': 0.0, 'Shuffle': false, - 'StreamingCacheSize': 1000000, 'LabelColumn': 'Label', 'TrainingData': '$Var_9ccc8bce4f6540eb8a244ab40585602a', 'FeatureColumn': 'Features', From 59673b8f5e513768d4d3e83325334fa89f483da8 Mon Sep 17 00:00:00 2001 From: Shahab Moradi Date: Tue, 12 Feb 2019 15:21:15 -0800 Subject: [PATCH 11/14] Fixed breaking changes from master. For sample code, changed numIterations to 10 as per Justin's request --- .../AveragedPerceptron.cs | 21 ++++++++++--------- .../AveragedPerceptronWithOptions.cs | 6 +++--- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs index 35bdefa434..ee2e3fdd94 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs @@ -19,27 +19,28 @@ public static void Example() var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); // Leave out 10% of data for testing - var (trainData, testData) = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1); + var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1); // Create data training pipeline - var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron("IsOver50K", "Features"); + var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron( + "IsOver50K", "Features", numIterations: 10); // Fit this pipeline to the training data - var model = pipeline.Fit(trainData); + var model = pipeline.Fit(trainTestData.TrainSet); // Evaluate how the model is doing on the test data - var dataWithPredictions = model.Transform(testData); + var dataWithPredictions = model.Transform(trainTestData.TestSet); var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions, "IsOver50K"); SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Output: - // Accuracy: 0.85 - // AUC: 0.90 - // F1 Score: 0.66 - // Negative Precision: 0.89 + // Accuracy: 0.86 + // AUC: 0.91 + // F1 Score: 0.68 + // Negative Precision: 0.90 // Negative Recall: 0.91 - // Positive Precision: 0.69 - // Positive Recall: 0.63 + // Positive Precision: 0.70 + // Positive Recall: 0.66 } } } \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs index eaf8066398..ac9296a96d 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs @@ -20,7 +20,7 @@ public static void Example() var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); // Leave out 10% of data for testing - var (trainData, testData) = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1); + var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1); // Define the trainer options var options = new AveragedPerceptronTrainer.Options() @@ -38,10 +38,10 @@ public static void Example() var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron(options); // Fit this pipeline to the training data - var model = pipeline.Fit(trainData); + var model = pipeline.Fit(trainTestData.TrainSet); // Evaluate how the model is doing on the test data - var dataWithPredictions = model.Transform(testData); + var dataWithPredictions = model.Transform(trainTestData.TestSet); var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions, "IsOver50K"); SamplesUtils.ConsoleUtils.PrintMetrics(metrics); From 3daf7bdeacdf834fe74f6af2a1cdb94552795bc9 Mon Sep 17 00:00:00 2001 From: Shahab Moradi Date: Wed, 13 Feb 2019 14:06:14 -0800 Subject: [PATCH 12/14] Addressed the PR comments --- .../AveragedPerceptron.cs | 38 ++++++++--------- .../AveragedPerceptronWithOptions.cs | 41 +++++++++---------- .../SamplesDatasetUtils.cs | 4 +- .../Standard/Online/AveragedLinear.cs | 5 ++- .../Standard/Online/AveragedPerceptron.cs | 7 +++- .../Standard/Online/OnlineLinear.cs | 13 ++++-- .../StandardLearnersCatalog.cs | 29 ++++++------- 7 files changed, 72 insertions(+), 65 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs index ee2e3fdd94..767d398dc6 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs @@ -4,43 +4,41 @@ namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification { public static class AveragedPerceptron { + // In this examples we will use the adult income dataset. The goal is to predict + // if a person's income is above $50K or not, based on different pieces of information about that person. + // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult. public static void Example() { - // In this examples we will use the adult income dataset. The goal is to predict - // if a person's income is above $50K or not, based on different pieces of information about that person. - // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, // as a catalog of available operations and as the source of randomness. // Setting the seed to a fixed number in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); - // Download and featurize the dataset + // Download and featurize the dataset. var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); - // Leave out 10% of data for testing + // Leave out 10% of data for testing. var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1); - // Create data training pipeline - var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron( - "IsOver50K", "Features", numIterations: 10); + // Create data training pipeline. + var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron(numIterations: 10); - // Fit this pipeline to the training data + // Fit this pipeline to the training data. var model = pipeline.Fit(trainTestData.TrainSet); - // Evaluate how the model is doing on the test data + // Evaluate how the model is doing on the test data. var dataWithPredictions = model.Transform(trainTestData.TestSet); - var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions, "IsOver50K"); + var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions); SamplesUtils.ConsoleUtils.PrintMetrics(metrics); - // Output: - // Accuracy: 0.86 - // AUC: 0.91 - // F1 Score: 0.68 - // Negative Precision: 0.90 - // Negative Recall: 0.91 - // Positive Precision: 0.70 - // Positive Recall: 0.66 + // Expected output: + // Accuracy: 0.86 + // AUC: 0.91 + // F1 Score: 0.68 + // Negative Precision: 0.90 + // Negative Recall: 0.91 + // Positive Precision: 0.70 + // Positive Recall: 0.66 } } } \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs index ac9296a96d..ee568bff92 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs @@ -5,54 +5,51 @@ namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification { public static class AveragedPerceptronWithOptions { + // In this examples we will use the adult income dataset. The goal is to predict + // if a person's income is above $50K or not, based on different pieces of information about that person. + // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult. public static void Example() { - // In this examples we will use the adult income dataset. The goal is to predict - // if a person's income is above $50K or not, based on different pieces of information about that person. - // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, // as a catalog of available operations and as the source of randomness. // Setting the seed to a fixed number in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); - // Download and featurize the dataset + // Download and featurize the dataset. var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); - // Leave out 10% of data for testing + // Leave out 10% of data for testing. var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1); - // Define the trainer options + // Define the trainer options. var options = new AveragedPerceptronTrainer.Options() { LossFunction = new SmoothedHingeLoss.Arguments(), LearningRate = 0.1f, DoLazyUpdates = false, RecencyGain = 0.1f, - NumberOfIterations = 10, - LabelColumn = "IsOver50K", - FeatureColumn = "Features" + NumberOfIterations = 10 }; - // Create data training pipeline + // Create data training pipeline. var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron(options); - // Fit this pipeline to the training data + // Fit this pipeline to the training data. var model = pipeline.Fit(trainTestData.TrainSet); - // Evaluate how the model is doing on the test data + // Evaluate how the model is doing on the test data. var dataWithPredictions = model.Transform(trainTestData.TestSet); - var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions, "IsOver50K"); + var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions); SamplesUtils.ConsoleUtils.PrintMetrics(metrics); - // Output: - // Accuracy: 0.86 - // AUC: 0.90 - // F1 Score: 0.66 - // Negative Precision: 0.89 - // Negative Recall: 0.93 - // Positive Precision: 0.72 - // Positive Recall: 0.61 + // Expected output: + // Accuracy: 0.86 + // AUC: 0.90 + // F1 Score: 0.66 + // Negative Precision: 0.89 + // Negative Recall: 0.93 + // Positive Precision: 0.72 + // Positive Recall: 0.61 } } } \ No newline at end of file diff --git a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs index 918cd26a9d..fb5bdcfe3e 100644 --- a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs +++ b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs @@ -117,9 +117,9 @@ public static IDataView LoadFeaturizedAdultDataset(MLContext mlContext) ); // Create data featurizing pipeline - var pipeline = + var pipeline = mlContext.Transforms.CopyColumns("Label", "IsOver50K") // Convert categorical features to one-hot vectors - mlContext.Transforms.Categorical.OneHotEncoding("workclass") + .Append(mlContext.Transforms.Categorical.OneHotEncoding("workclass")) .Append(mlContext.Transforms.Categorical.OneHotEncoding("education")) .Append(mlContext.Transforms.Categorical.OneHotEncoding("marital-status")) .Append(mlContext.Transforms.Categorical.OneHotEncoding("occupation")) diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs index 9234cd2df4..f5919f8b47 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs @@ -15,6 +15,9 @@ namespace Microsoft.ML.Trainers.Online { + /// + /// Arguments class for averaged linear trainers. + /// public abstract class AveragedLinearArguments : OnlineLinearArguments { /// @@ -30,7 +33,7 @@ public abstract class AveragedLinearArguments : OnlineLinearArguments /// /// /// to decrease the as iterations progress; otherwise, . - /// Default is . + /// Default is . The learning rate will be reduced with every weight update proportional to the square root of the number of updates. /// [Argument(ArgumentType.AtMostOnce, HelpText = "Decrease learning rate", ShortName = "decreaselr", SortOrder = 50)] [TGUI(Label = "Decrease Learning Rate", Description = "Decrease learning rate as iterations progress")] diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs index d3dbdf619e..29b2573ffe 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs @@ -25,7 +25,7 @@ namespace Microsoft.ML.Trainers.Online { /// - /// This is averaged perceptron trainer. + /// The for the averaged perceptron trainer. /// /// /// For usage details, please see @@ -39,10 +39,13 @@ public sealed class AveragedPerceptronTrainer : AveragedLinearTrainer + /// Options for the averaged perceptron trainer. + /// public sealed class Options : AveragedLinearArguments { /// - /// The custom loss. + /// A custom loss. /// [Argument(ArgumentType.Multiple, HelpText = "Loss Function", ShortName = "loss", SortOrder = 50)] public ISupportClassificationLossFactory LossFunction = new HingeLoss.Arguments(); diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs index 4dbdc8da57..4790700a0d 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLinear.cs @@ -16,13 +16,15 @@ namespace Microsoft.ML.Trainers.Online { - + /// + /// Arguments class for online linear trainers. + /// public abstract class OnlineLinearArguments : LearnerInputBaseWithLabel { /// - /// Number of training iterations through the data. + /// Number of passes through the training dataset. /// - [Argument(ArgumentType.AtMostOnce, HelpText = "Number of iterations", ShortName = "iter", SortOrder = 50)] + [Argument(ArgumentType.AtMostOnce, HelpText = "Number of iterations", ShortName = "iter, numIterations", SortOrder = 50)] [TGUI(Label = "Number of Iterations", Description = "Number of training iterations through data", SuggestedSweeps = "1,10,100")] [TlcModule.SweepableLongParamAttribute("NumIterations", 1, 100, stepSize: 10, isLogScale: true)] public int NumberOfIterations = OnlineDefaultArgs.NumIterations; @@ -47,9 +49,12 @@ public abstract class OnlineLinearArguments : LearnerInputBaseWithLabel public float InitialWeightsDiameter = 0; /// + /// Determines whether to shuffle data for each training iteration. + /// + /// /// to shuffle data for each training iteration; otherwise, . /// Default is . - /// + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Whether to shuffle for each training iteration", ShortName = "shuf")] [TlcModule.SweepableDiscreteParamAttribute("Shuffle", new object[] { false, true })] public bool Shuffle = true; diff --git a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs index d78464acde..b12340f4f1 100644 --- a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs +++ b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs @@ -190,23 +190,22 @@ public static SdcaMultiClassTrainer StochasticDualCoordinateAscent(this Multicla } /// - /// Predict a target using a linear binary classification model trained with averaged perceptron trainer. + /// Predict a target using a linear binary classification model trained with the averaged perceptron trainer. /// /// - /// Perceptron is a classification algorithm that makes its predictions by finding a separating hyperplane. + /// The perceptron is a classification algorithm that makes its predictions by finding a separating hyperplane. /// For instance, with feature values f0, f1,..., f_D-1, the prediction is given by determining what side of the hyperplane the point falls into. /// That is the same as the sign of sigma[0, D-1] (w_i * f_i), where w_0, w_1,..., w_D-1 are the weights computed by the algorithm. /// /// The perceptron is an online algorithm, which means it processes the instances in the training set one at a time. /// It starts with a set of initial weights (zero, random, or initialized from a previous learner). Then, for each example in the training set, the weighted sum of the features (sigma[0, D-1] (w_i * f_i)) is computed. - /// If this value has the same sign as the label of the current example, the weights remain the same.If they have opposite signs, - /// the weights vector is updated by either subtracting or adding (if the label is negative or positive, respectively) the feature vector of the current example, - /// multiplied by a factor 0 < a <= 1, called the learning rate.In a generalization of this algorithm, the weights are updated by adding the feature vector multiplied by the learning rate, + /// If this value has the same sign as the label of the current example, the weights remain the same. If they have opposite signs, + /// the weights vector is updated by either adding or subtracting (if the label is positive or negative, respectively) the feature vector of the current example, + /// multiplied by a factor 0 < a <= 1, called the learning rate. In a generalization of this algorithm, the weights are updated by adding the feature vector multiplied by the learning rate, /// and by the gradient of some loss function (in the specific case described above, the loss is hinge-loss, whose gradient is 1 when it is non-zero). /// - /// In Averaged Perceptron (AKA voted-perceptron), the weight vectors are stored, - /// together with a weight that counts the number of iterations it survived (this is equivalent to storing the weight vector after every iteration, regardless of whether it was updated or not). - /// The prediction is then calculated by taking the weighted average of all the sums sigma[0, D-1] (w_i * f_i) or the different weight vectors. + /// In Averaged Perceptron (aka voted-perceptron), for each iteration, i.e. pass through the training data, a weight vector is calculated as explained above. + /// The final prediction is then calculate by averaging the weighted sum from each weight vector and looking at the sign of the result. /// /// For more information see Wikipedia entry for Perceptron /// or Large Margin Classification Using the Perceptron Algorithm @@ -214,7 +213,7 @@ public static SdcaMultiClassTrainer StochasticDualCoordinateAscent(this Multicla /// The binary classification catalog trainer object. /// The name of the label column, or dependent variable. /// The features, or independent variables. - /// The custom loss. If , hinge loss will be used resulting in max-margin averaged perceptron. + /// A custom loss. If , hinge loss will be used resulting in max-margin averaged perceptron. /// The optional example weights. /// Learning rate. /// @@ -222,11 +221,11 @@ public static SdcaMultiClassTrainer StochasticDualCoordinateAscent(this Multicla /// Default is . /// /// L2 weight for regularization. - /// Number of training iterations through the data. + /// Number of passes through the training dataset. /// /// /// /// /// @@ -248,15 +247,17 @@ public static AveragedPerceptronTrainer AveragedPerceptron( } /// - /// Predict a target using a linear binary classification model trained with averaged perceptron trainer using advanced options. - /// For usage details, please see + /// Predict a target using a linear binary classification model trained with the averaged perceptron trainer using advanced options. /// + /// + /// For usage details, please see + /// /// The binary classification catalog trainer object. /// Trainer options. /// /// /// /// /// From b10d7e6aa2c5eac720c508055f976ae83947da49 Mon Sep 17 00:00:00 2001 From: Shahab Moradi Date: Wed, 13 Feb 2019 14:14:37 -0800 Subject: [PATCH 13/14] Moved algorithm remarks to the estimator class. --- .../Standard/Online/AveragedPerceptron.cs | 17 ++++++++++++- .../StandardLearnersCatalog.cs | 25 ++----------------- .../Common/EntryPoints/core_manifest.json | 9 ++++--- 3 files changed, 24 insertions(+), 27 deletions(-) diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs index 29b2573ffe..e7348f096b 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs @@ -28,7 +28,22 @@ namespace Microsoft.ML.Trainers.Online /// The for the averaged perceptron trainer. /// /// - /// For usage details, please see + /// The perceptron is a classification algorithm that makes its predictions by finding a separating hyperplane. + /// For instance, with feature values f0, f1,..., f_D-1, the prediction is given by determining what side of the hyperplane the point falls into. + /// That is the same as the sign of sigma[0, D-1] (w_i * f_i), where w_0, w_1,..., w_D-1 are the weights computed by the algorithm. + /// + /// The perceptron is an online algorithm, which means it processes the instances in the training set one at a time. + /// It starts with a set of initial weights (zero, random, or initialized from a previous learner). Then, for each example in the training set, the weighted sum of the features (sigma[0, D-1] (w_i * f_i)) is computed. + /// If this value has the same sign as the label of the current example, the weights remain the same. If they have opposite signs, + /// the weights vector is updated by either adding or subtracting (if the label is positive or negative, respectively) the feature vector of the current example, + /// multiplied by a factor 0 < a <= 1, called the learning rate. In a generalization of this algorithm, the weights are updated by adding the feature vector multiplied by the learning rate, + /// and by the gradient of some loss function (in the specific case described above, the loss is hinge-loss, whose gradient is 1 when it is non-zero). + /// + /// In Averaged Perceptron (aka voted-perceptron), for each iteration, i.e. pass through the training data, a weight vector is calculated as explained above. + /// The final prediction is then calculate by averaging the weighted sum from each weight vector and looking at the sign of the result. + /// + /// For more information see Wikipedia entry for Perceptron + /// or Large Margin Classification Using the Perceptron Algorithm /// public sealed class AveragedPerceptronTrainer : AveragedLinearTrainer, LinearBinaryModelParameters> { diff --git a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs index b12340f4f1..a59ef319c9 100644 --- a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs +++ b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs @@ -190,26 +190,8 @@ public static SdcaMultiClassTrainer StochasticDualCoordinateAscent(this Multicla } /// - /// Predict a target using a linear binary classification model trained with the averaged perceptron trainer. + /// Predict a target using a linear binary classification model trained with . /// - /// - /// The perceptron is a classification algorithm that makes its predictions by finding a separating hyperplane. - /// For instance, with feature values f0, f1,..., f_D-1, the prediction is given by determining what side of the hyperplane the point falls into. - /// That is the same as the sign of sigma[0, D-1] (w_i * f_i), where w_0, w_1,..., w_D-1 are the weights computed by the algorithm. - /// - /// The perceptron is an online algorithm, which means it processes the instances in the training set one at a time. - /// It starts with a set of initial weights (zero, random, or initialized from a previous learner). Then, for each example in the training set, the weighted sum of the features (sigma[0, D-1] (w_i * f_i)) is computed. - /// If this value has the same sign as the label of the current example, the weights remain the same. If they have opposite signs, - /// the weights vector is updated by either adding or subtracting (if the label is positive or negative, respectively) the feature vector of the current example, - /// multiplied by a factor 0 < a <= 1, called the learning rate. In a generalization of this algorithm, the weights are updated by adding the feature vector multiplied by the learning rate, - /// and by the gradient of some loss function (in the specific case described above, the loss is hinge-loss, whose gradient is 1 when it is non-zero). - /// - /// In Averaged Perceptron (aka voted-perceptron), for each iteration, i.e. pass through the training data, a weight vector is calculated as explained above. - /// The final prediction is then calculate by averaging the weighted sum from each weight vector and looking at the sign of the result. - /// - /// For more information see Wikipedia entry for Perceptron - /// or Large Margin Classification Using the Perceptron Algorithm - /// /// The binary classification catalog trainer object. /// The name of the label column, or dependent variable. /// The features, or independent variables. @@ -247,11 +229,8 @@ public static AveragedPerceptronTrainer AveragedPerceptron( } /// - /// Predict a target using a linear binary classification model trained with the averaged perceptron trainer using advanced options. + /// Predict a target using a linear binary classification model trained with and advanced options. /// - /// - /// For usage details, please see - /// /// The binary classification catalog trainer object. /// Trainer options. /// diff --git a/test/BaselineOutput/Common/EntryPoints/core_manifest.json b/test/BaselineOutput/Common/EntryPoints/core_manifest.json index 5d720ce6de..a6e3b29dd9 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_manifest.json +++ b/test/BaselineOutput/Common/EntryPoints/core_manifest.json @@ -4310,7 +4310,8 @@ "Type": "Int", "Desc": "Number of iterations", "Aliases": [ - "iter" + "iter", + "numIterations" ], "Required": false, "SortOrder": 50.0, @@ -13240,7 +13241,8 @@ "Type": "Int", "Desc": "Number of iterations", "Aliases": [ - "iter" + "iter", + "numIterations" ], "Required": false, "SortOrder": 50.0, @@ -14254,7 +14256,8 @@ "Type": "Int", "Desc": "Number of iterations", "Aliases": [ - "iter" + "iter", + "numIterations" ], "Required": false, "SortOrder": 50.0, From 056a88792e4633699ee1c4d59477c0455d8276a3 Mon Sep 17 00:00:00 2001 From: Shahab Moradi Date: Thu, 14 Feb 2019 12:30:49 -0800 Subject: [PATCH 14/14] Added doc xml docs --- src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs index 1ce4d5810d..203bd6e6bd 100644 --- a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs +++ b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs @@ -87,6 +87,14 @@ public static string DownloadSentimentDataset() public static string DownloadAdultDataset() => Download("https://raw.githubusercontent.com/dotnet/machinelearning/244a8c2ac832657af282aa312d568211698790aa/test/data/adult.train", "adult.txt"); + /// + /// Downloads the Adult UCI dataset and featurizes it to be suitable for classification tasks. + /// + /// used for data loading and processing. + /// Featurized dataset. + /// + /// For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult. + /// public static IDataView LoadFeaturizedAdultDataset(MLContext mlContext) { // Download the file