From 19d25e2e87cdc0d1ba66888afed94c8af56094a2 Mon Sep 17 00:00:00 2001 From: Zeeshan Ahmed Date: Mon, 25 Feb 2019 16:55:12 -0800 Subject: [PATCH 1/7] Scrubbing FieldAwareFactorizationMachine learner. --- .../Dynamic/Calibrator.cs | 2 +- .../Dynamic/FieldAwareFactorizationMachine.cs | 71 ---------------- .../FieldAwareFactorizationMachine.cs | 71 ++++++++++++++++ ...ldAwareFactorizationMachinewWithOptions.cs | 80 +++++++++++++++++++ .../SDCALogisticRegression.cs | 2 +- .../Dynamic/Trainers/PriorTrainerSample.cs | 2 +- .../Dynamic/Trainers/RandomTrainerSample.cs | 2 +- .../Microsoft.ML.Samples.csproj | 1 + .../SamplesDatasetUtils.cs | 44 ++++++++-- .../FactorizationMachineCatalog.cs | 8 +- .../FactorizationMachineTrainer.cs | 8 +- 11 files changed, 206 insertions(+), 85 deletions(-) delete mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/FieldAwareFactorizationMachine.cs create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.cs create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachinewWithOptions.cs diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Calibrator.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Calibrator.cs index dc52d0eed4..b8d7bc9f6e 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Calibrator.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Calibrator.cs @@ -16,7 +16,7 @@ public static void Example() // This will create a sentiment.tsv file in the filesystem. // The string, dataFile, is the path to the downloaded file. // You can open this file, if you want to see the data. - string dataFile = SamplesUtils.DatasetUtils.DownloadSentimentDataset(); + (string dataFile, _ ) = SamplesUtils.DatasetUtils.DownloadSentimentDataset(); // A preview of the data. // Sentiment SentimentText diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/FieldAwareFactorizationMachine.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/FieldAwareFactorizationMachine.cs deleted file mode 100644 index e9c54e2572..0000000000 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/FieldAwareFactorizationMachine.cs +++ /dev/null @@ -1,71 +0,0 @@ -using System; -using Microsoft.ML.Data; -namespace Microsoft.ML.Samples.Dynamic -{ - public static class FFMBinaryClassification - { - public static void Example() - { - // Downloading the dataset from github.com/dotnet/machinelearning. - // This will create a sentiment.tsv file in the filesystem. - // You can open this file, if you want to see the data. - string dataFile = SamplesUtils.DatasetUtils.DownloadSentimentDataset(); - - // A preview of the data. - // Sentiment SentimentText - // 0 " :Erm, thank you. " - // 1 ==You're cool== - - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - var mlContext = new MLContext(); - - // Step 1: Read the data as an IDataView. - // First, we define the reader: specify the data columns and where to find them in the text file. - var reader = mlContext.Data.CreateTextLoader( - columns: new[] - { - new TextLoader.Column("Sentiment", DataKind.BL, 0), - new TextLoader.Column("SentimentText", DataKind.Text, 1) - }, - hasHeader: true - ); - - // Read the data - var data = reader.Read(dataFile); - - // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times, it can be slow due to - // expensive featurization and disk operations. When the considered data can fit into memory, a solution is to cache the data in memory. Caching is especially - // helpful when working with iterative algorithms which needs many data passes. Since SDCA is the case, we cache. Inserting a - // cache step in a pipeline is also possible, please see the construction of pipeline below. - data = mlContext.Data.Cache(data); - - // Step 2: Pipeline - // Featurize the text column through the FeaturizeText API. - // Then append a binary classifier, setting the "Label" column as the label of the dataset, and - // the "Features" column produced by FeaturizeText as the features column. - var pipeline = mlContext.Transforms.Text.FeaturizeText("SentimentText", "Features") - .AppendCacheCheckpoint(mlContext) // Add a data-cache step within a pipeline. - .Append(mlContext.BinaryClassification.Trainers.FieldAwareFactorizationMachine(labelColumnName: "Sentiment", featureColumnNames: new[] { "Features" })); - - // Fit the model. - var model = pipeline.Fit(data); - - // Let's get the model parameters from the model. - var modelParams = model.LastTransformer.Model; - - // Let's inspect the model parameters. - var featureCount = modelParams.GetFeatureCount(); - var fieldCount = modelParams.GetFieldCount(); - var latentDim = modelParams.GetLatentDim(); - var linearWeights = modelParams.GetLinearWeights(); - var latentWeights = modelParams.GetLatentWeights(); - - Console.WriteLine("The feature count is: " + featureCount); - Console.WriteLine("The number of fields is: " + fieldCount); - Console.WriteLine("The latent dimension is: " + latentDim); - Console.WriteLine("The lineear weights of the features are: " + string.Join(", ", linearWeights)); - Console.WriteLine("The weights of the latent features are: " + string.Join(", ", latentWeights)); - } - } -} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.cs new file mode 100644 index 0000000000..7e72437741 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.cs @@ -0,0 +1,71 @@ +using System; +using System.Linq; +using Microsoft.ML.Data; +namespace Microsoft.ML.Samples.Dynamic +{ + public static class FFMBinaryClassification + { + public static void Example() + { + // Creating the ML.Net IHostEnvironment object, needed for the pipeline. + var mlContext = new MLContext(); + + // Download and featurize the dataset. + (var trainData, var testData) = SamplesUtils.DatasetUtils.LoadFeaturizedSentimentDataset(mlContext); + + // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times, it can be slow due to + // expensive featurization and disk operations. When the considered data can fit into memory, a solution is to cache the data in memory. Caching is especially + // helpful when working with iterative algorithms which needs many data passes. Since SDCA is the case, we cache. Inserting a + // cache step in a pipeline is also possible, please see the construction of pipeline below. + trainData = mlContext.Data.Cache(trainData); + + // Step 2: Pipeline + // Create the 'FieldAwareFactorizationMachine' binary classifier, setting the "Sentiment" column as the label of the dataset, and + // the "Features" column as the features column. + var pipeline = new EstimatorChain().AppendCacheCheckpoint(mlContext) + .Append(mlContext.BinaryClassification.Trainers. + FieldAwareFactorizationMachine(labelColumnName: "Sentiment", featureColumnNames: new[] { "Features" })); + + // Fit the model. + var model = pipeline.Fit(trainData); + + // Let's get the model parameters from the model. + var modelParams = model.LastTransformer.Model; + + // Let's inspect the model parameters. + var featureCount = modelParams.GetFeatureCount(); + var fieldCount = modelParams.GetFieldCount(); + var latentDim = modelParams.GetLatentDim(); + var linearWeights = modelParams.GetLinearWeights(); + var latentWeights = modelParams.GetLatentWeights(); + + Console.WriteLine("The feature count is: " + featureCount); + Console.WriteLine("The number of fields is: " + fieldCount); + Console.WriteLine("The latent dimension is: " + latentDim); + Console.WriteLine("The linear weights of some of the features are: " + + string.Concat(Enumerable.Range(1, 10).Select(i => $"{linearWeights[i]:F4} "))); + Console.WriteLine("The weights of some of the latent features are: " + + string.Concat(Enumerable.Range(1, 10).Select(i => $"{latentWeights[i]:F4} "))); + + // The feature count is: 9374 + // The number of fields is: 1 + // The latent dimension is: 20 + // The linear weights of some of the features are: 0.0196 0.0000 -0.0045 -0.0205 0.0000 0.0032 0.0682 0.0091 -0.0151 0.0089 + // The weights of some of the latent features are: 0.3316 0.2140 0.0752 0.0908 -0.0495 -0.0810 0.0761 0.0966 0.0090 -0.0962 + + // Evaluate how the model is doing on the test data. + var dataWithPredictions = model.Transform(testData); + + var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions, "Sentiment"); + SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + + // Accuracy: 0.72 + // AUC: 0.75 + // F1 Score: 0.74 + // Negative Precision: 0.75 + // Negative Recall: 0.67 + // Positive Precision: 0.70 + // Positive Recall: 0.78 + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachinewWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachinewWithOptions.cs new file mode 100644 index 0000000000..dc2eeeb78c --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachinewWithOptions.cs @@ -0,0 +1,80 @@ +using System; +using System.Linq; +using Microsoft.ML.Data; +using Microsoft.ML.FactorizationMachine; + +namespace Microsoft.ML.Samples.Dynamic +{ + public static class FFMBinaryClassificationWithOptions + { + public static void Example() + { + // Creating the ML.Net IHostEnvironment object, needed for the pipeline. + var mlContext = new MLContext(); + + // Download and featurize the dataset. + (var trainData, var testData) = SamplesUtils.DatasetUtils.LoadFeaturizedSentimentDataset(mlContext); + + // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times, it can be slow due to + // expensive featurization and disk operations. When the considered data can fit into memory, a solution is to cache the data in memory. Caching is especially + // helpful when working with iterative algorithms which needs many data passes. Since SDCA is the case, we cache. Inserting a + // cache step in a pipeline is also possible, please see the construction of pipeline below. + trainData = mlContext.Data.Cache(trainData); + + // Step 2: Pipeline + // Create the 'FieldAwareFactorizationMachine' binary classifier, setting the "Sentiment" column as the label of the dataset, and + // the "Features" column as the features column. + var pipeline = new EstimatorChain().AppendCacheCheckpoint(mlContext) + .Append(mlContext.BinaryClassification.Trainers. + FieldAwareFactorizationMachine( + new FieldAwareFactorizationMachineTrainer.Options + { + FeatureColumn = "Features", + LabelColumn = "Sentiment", + LearningRate = 0.1f, + Iters = 10 + })); + + // Fit the model. + var model = pipeline.Fit(trainData); + + // Let's get the model parameters from the model. + var modelParams = model.LastTransformer.Model; + + // Let's inspect the model parameters. + var featureCount = modelParams.GetFeatureCount(); + var fieldCount = modelParams.GetFieldCount(); + var latentDim = modelParams.GetLatentDim(); + var linearWeights = modelParams.GetLinearWeights(); + var latentWeights = modelParams.GetLatentWeights(); + + Console.WriteLine("The feature count is: " + featureCount); + Console.WriteLine("The number of fields is: " + fieldCount); + Console.WriteLine("The latent dimension is: " + latentDim); + Console.WriteLine("The linear weights of some of the features are: " + + string.Concat(Enumerable.Range(1, 10).Select(i => $"{linearWeights[i]:F4} "))); + Console.WriteLine("The weights of some of the latent features are: " + + string.Concat(Enumerable.Range(1, 10).Select(i => $"{latentWeights[i]:F4} "))); + + // The feature count is: 9374 + // The number of fields is: 1 + // The latent dimension is: 20 + // The linear weights of some of the features are: 0.0410 0.0000 -0.0078 -0.0285 0.0000 0.0114 0.1313 0.0183 -0.0224 0.0166 + // The weights of some of the latent features are: -0.0326 0.1127 0.0621 0.1446 0.2038 0.1608 0.2084 0.0141 0.2458 -0.0625 + + // Evaluate how the model is doing on the test data. + var dataWithPredictions = model.Transform(testData); + + var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions, "Sentiment"); + SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + + // Accuracy: 0.78 + // AUC: 0.81 + // F1 Score: 0.78 + // Negative Precision: 0.78 + // Negative Recall: 0.78 + // Positive Precision: 0.78 + // Positive Recall: 0.78 + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SDCALogisticRegression.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SDCALogisticRegression.cs index 979976cc01..dfc57824e1 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SDCALogisticRegression.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SDCALogisticRegression.cs @@ -12,7 +12,7 @@ public static void Example() // Downloading the dataset from github.com/dotnet/machinelearning. // This will create a sentiment.tsv file in the filesystem. // You can open this file, if you want to see the data. - string dataFile = SamplesUtils.DatasetUtils.DownloadSentimentDataset(); + (var dataFile, _ ) = SamplesUtils.DatasetUtils.DownloadSentimentDataset(); // A preview of the data. // Sentiment SentimentText diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/PriorTrainerSample.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/PriorTrainerSample.cs index 55aa9793c5..1a41f18a6e 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/PriorTrainerSample.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/PriorTrainerSample.cs @@ -10,7 +10,7 @@ public static void Example() // Downloading the dataset from github.com/dotnet/machinelearning. // This will create a sentiment.tsv file in the filesystem. // You can open this file, if you want to see the data. - string dataFile = SamplesUtils.DatasetUtils.DownloadSentimentDataset(); + (string dataFile, _ ) = SamplesUtils.DatasetUtils.DownloadSentimentDataset(); // A preview of the data. // Sentiment SentimentText diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/RandomTrainerSample.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/RandomTrainerSample.cs index ce68f88950..d821b63be1 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/RandomTrainerSample.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/RandomTrainerSample.cs @@ -10,7 +10,7 @@ public static void Example() // Downloading the dataset from github.com/dotnet/machinelearning. // This will create a sentiment.tsv file in the filesystem. // You can open this file, if you want to see the data. - string dataFile = SamplesUtils.DatasetUtils.DownloadSentimentDataset(); + (string dataFile, _ ) = SamplesUtils.DatasetUtils.DownloadSentimentDataset(); // A preview of the data. // Sentiment SentimentText diff --git a/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj b/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj index aebf45c592..63f92ff47d 100644 --- a/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj +++ b/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj @@ -22,6 +22,7 @@ + diff --git a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs index 203bd6e6bd..da988be472 100644 --- a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs +++ b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs @@ -78,14 +78,48 @@ public sealed class HousingRegression /// /// Downloads the wikipedia detox dataset from the ML.NET repo. /// - public static string DownloadSentimentDataset() - => Download("https://raw.githubusercontent.com/dotnet/machinelearning/76cb2cdf5cc8b6c88ca44b8969153836e589df04/test/data/wikipedia-detox-250-line-data.tsv", "sentiment.tsv"); + public static (string trainFile, string testFile) DownloadSentimentDataset() + { + var trainFile = Download("https://raw.githubusercontent.com/dotnet/machinelearning/76cb2cdf5cc8b6c88ca44b8969153836e589df04/test/data/wikipedia-detox-250-line-data.tsv", "sentiment.tsv"); + var testFile = Download("https://raw.githubusercontent.com/dotnet/machinelearning/76cb2cdf5cc8b6c88ca44b8969153836e589df04/test/data/wikipedia-detox-250-line-test.tsv", "sentimenttest.tsv"); + return (trainFile, testFile); + } + + /// + /// Downloads the adult dataset from the ML.NET repo. + /// + public static string DownloadAdultDataset() + => Download("https://raw.githubusercontent.com/dotnet/machinelearning/244a8c2ac832657af282aa312d568211698790aa/test/data/adult.train", "adult.txt"); /// - /// Downloads the adult dataset from the ML.NET repo. + /// Downloads the wikipedia detox dataset and featurizes it to be suitable for sentiment classification tasks. /// - public static string DownloadAdultDataset() - => Download("https://raw.githubusercontent.com/dotnet/machinelearning/244a8c2ac832657af282aa312d568211698790aa/test/data/adult.train", "adult.txt"); + /// used for data loading and processing. + /// Featurized dataset. + public static (IDataView trainData, IDataView testData) LoadFeaturizedSentimentDataset(MLContext mlContext) + { + // Download the file + (string trainFile, string testFile) = DownloadSentimentDataset(); + + // Define the columns to read + var reader = mlContext.Data.CreateTextLoader( + columns: new[] + { + new TextLoader.Column("Sentiment", DataKind.BL, 0), + new TextLoader.Column("SentimentText", DataKind.Text, 1) + }, + hasHeader: true + ); + + // Create data featurizing pipeline + var pipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText"); + + var data = reader.Read(trainFile); + var model = pipeline.Fit(data); + var featurizedDataTrain = model.Transform(data); + var featurizedDataTest = model.Transform(reader.Read(testFile)); + return (featurizedDataTrain, featurizedDataTest); + } /// /// Downloads the Adult UCI dataset and featurizes it to be suitable for classification tasks. diff --git a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineCatalog.cs b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineCatalog.cs index eb717898dd..6177cb4b43 100644 --- a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineCatalog.cs +++ b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineCatalog.cs @@ -23,7 +23,7 @@ public static class FactorizationMachineExtensions /// /// /// /// public static FieldAwareFactorizationMachineTrainer FieldAwareFactorizationMachine(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, @@ -41,6 +41,12 @@ public static FieldAwareFactorizationMachineTrainer FieldAwareFactorizationMachi /// /// The binary classification catalog trainer object. /// Advanced arguments to the algorithm. + /// + /// + /// + /// public static FieldAwareFactorizationMachineTrainer FieldAwareFactorizationMachine(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, FieldAwareFactorizationMachineTrainer.Options options) { diff --git a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs index aef265f8d7..395c868c72 100644 --- a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs +++ b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs @@ -26,7 +26,7 @@ namespace Microsoft.ML.FactorizationMachine { /* Train a field-aware factorization machine using ADAGRAD (an advanced stochastic gradient method). See references below - for details. This trainer is essentially faster the one introduced in [2] because of some implementation tricks[3]. + for details. This trainer is essentially faster than the one introduced in [2] because of some implementation tricks[3]. [1] http://jmlr.org/papers/volume12/duchi11a/duchi11a.pdf [2] https://www.csie.ntu.edu.tw/~cjlin/papers/ffm.pdf [3] https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf @@ -156,12 +156,12 @@ internal FieldAwareFactorizationMachineTrainer(IHostEnvironment env, Options opt /// The private instance of . /// The name of column hosting the features. The i-th element stores feature column of the i-th field. /// The name of the label column. - /// The name of the optional weights' column. + /// The name of the optional weights' column. [BestFriend] internal FieldAwareFactorizationMachineTrainer(IHostEnvironment env, string[] featureColumns, string labelColumn = DefaultColumnNames.Label, - string weights = null) + string weightColumn = null) { Contracts.CheckValue(env, nameof(env)); _host = env.Register(LoadName); @@ -176,7 +176,7 @@ internal FieldAwareFactorizationMachineTrainer(IHostEnvironment env, FeatureColumns[i] = new SchemaShape.Column(featureColumns[i], SchemaShape.Column.VectorKind.Vector, NumberDataViewType.Single, false); LabelColumn = new SchemaShape.Column(labelColumn, SchemaShape.Column.VectorKind.Scalar, BooleanDataViewType.Instance, false); - WeightColumn = weights != null ? new SchemaShape.Column(weights, SchemaShape.Column.VectorKind.Scalar, NumberDataViewType.Single, false) : default; + WeightColumn = weightColumn != null ? new SchemaShape.Column(weightColumn, SchemaShape.Column.VectorKind.Scalar, NumberDataViewType.Single, false) : default; } /// From b89c63b8839f40db47b136208720501d3ae388d0 Mon Sep 17 00:00:00 2001 From: Zeeshan Ahmed Date: Tue, 26 Feb 2019 12:45:48 -0800 Subject: [PATCH 2/7] Addressed reviewers' comments. --- .../Dynamic/Calibrator.cs | 2 +- .../FieldAwareFactorizationMachine.cs | 15 ++-- ...ldAwareFactorizationMachinewWithOptions.cs | 29 +++---- .../SDCALogisticRegression.cs | 2 +- .../Dynamic/Trainers/PriorTrainerSample.cs | 2 +- .../Dynamic/Trainers/RandomTrainerSample.cs | 2 +- .../SamplesDatasetUtils.cs | 18 ++--- .../FactorizationMachineTrainer.cs | 69 +++++++++++------ ...wareFactorizationMachineModelParameters.cs | 75 ++++++++++--------- .../TrainerEstimators/FAFMEstimator.cs | 4 +- 10 files changed, 127 insertions(+), 91 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Calibrator.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Calibrator.cs index b8d7bc9f6e..eff0c906dd 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Calibrator.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Calibrator.cs @@ -16,7 +16,7 @@ public static void Example() // This will create a sentiment.tsv file in the filesystem. // The string, dataFile, is the path to the downloaded file. // You can open this file, if you want to see the data. - (string dataFile, _ ) = SamplesUtils.DatasetUtils.DownloadSentimentDataset(); + string dataFile = SamplesUtils.DatasetUtils.DownloadSentimentDataset()[0]; // A preview of the data. // Sentiment SentimentText diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.cs index 7e72437741..55ec4d2349 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.cs @@ -7,11 +7,14 @@ public static class FFMBinaryClassification { public static void Example() { - // Creating the ML.Net IHostEnvironment object, needed for the pipeline. + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. var mlContext = new MLContext(); // Download and featurize the dataset. - (var trainData, var testData) = SamplesUtils.DatasetUtils.LoadFeaturizedSentimentDataset(mlContext); + var dataviews = SamplesUtils.DatasetUtils.LoadFeaturizedSentimentDataset(mlContext); + var trainData = dataviews[0]; + var testData = dataviews[1]; // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times, it can be slow due to // expensive featurization and disk operations. When the considered data can fit into memory, a solution is to cache the data in memory. Caching is especially @@ -24,7 +27,7 @@ public static void Example() // the "Features" column as the features column. var pipeline = new EstimatorChain().AppendCacheCheckpoint(mlContext) .Append(mlContext.BinaryClassification.Trainers. - FieldAwareFactorizationMachine(labelColumnName: "Sentiment", featureColumnNames: new[] { "Features" })); + FieldAwareFactorizationMachine(labelColumnName: "Sentiment", featureColumnNames: new[] { "Features" })); // Fit the model. var model = pipeline.Fit(trainData); @@ -33,9 +36,9 @@ public static void Example() var modelParams = model.LastTransformer.Model; // Let's inspect the model parameters. - var featureCount = modelParams.GetFeatureCount(); - var fieldCount = modelParams.GetFieldCount(); - var latentDim = modelParams.GetLatentDim(); + var featureCount = modelParams.FeatureCount; + var fieldCount = modelParams.FieldCount; + var latentDim = modelParams.LatentDimension; var linearWeights = modelParams.GetLinearWeights(); var latentWeights = modelParams.GetLatentWeights(); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachinewWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachinewWithOptions.cs index dc2eeeb78c..1a8d2eb27e 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachinewWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachinewWithOptions.cs @@ -9,11 +9,14 @@ public static class FFMBinaryClassificationWithOptions { public static void Example() { - // Creating the ML.Net IHostEnvironment object, needed for the pipeline. + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. var mlContext = new MLContext(); // Download and featurize the dataset. - (var trainData, var testData) = SamplesUtils.DatasetUtils.LoadFeaturizedSentimentDataset(mlContext); + var dataviews = SamplesUtils.DatasetUtils.LoadFeaturizedSentimentDataset(mlContext); + var trainData = dataviews[0]; + var testData = dataviews[1]; // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times, it can be slow due to // expensive featurization and disk operations. When the considered data can fit into memory, a solution is to cache the data in memory. Caching is especially @@ -26,14 +29,14 @@ public static void Example() // the "Features" column as the features column. var pipeline = new EstimatorChain().AppendCacheCheckpoint(mlContext) .Append(mlContext.BinaryClassification.Trainers. - FieldAwareFactorizationMachine( - new FieldAwareFactorizationMachineTrainer.Options - { - FeatureColumn = "Features", - LabelColumn = "Sentiment", - LearningRate = 0.1f, - Iters = 10 - })); + FieldAwareFactorizationMachine( + new FieldAwareFactorizationMachineTrainer.Options + { + FeatureColumn = "Features", + LabelColumn = "Sentiment", + LearningRate = 0.1f, + Iterations = 10 + })); // Fit the model. var model = pipeline.Fit(trainData); @@ -42,9 +45,9 @@ public static void Example() var modelParams = model.LastTransformer.Model; // Let's inspect the model parameters. - var featureCount = modelParams.GetFeatureCount(); - var fieldCount = modelParams.GetFieldCount(); - var latentDim = modelParams.GetLatentDim(); + var featureCount = modelParams.FeatureCount; + var fieldCount = modelParams.FieldCount; + var latentDim = modelParams.LatentDimension; var linearWeights = modelParams.GetLinearWeights(); var latentWeights = modelParams.GetLatentWeights(); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SDCALogisticRegression.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SDCALogisticRegression.cs index dfc57824e1..4a626dcb65 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SDCALogisticRegression.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SDCALogisticRegression.cs @@ -12,7 +12,7 @@ public static void Example() // Downloading the dataset from github.com/dotnet/machinelearning. // This will create a sentiment.tsv file in the filesystem. // You can open this file, if you want to see the data. - (var dataFile, _ ) = SamplesUtils.DatasetUtils.DownloadSentimentDataset(); + string dataFile = SamplesUtils.DatasetUtils.DownloadSentimentDataset()[0]; // A preview of the data. // Sentiment SentimentText diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/PriorTrainerSample.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/PriorTrainerSample.cs index 1a41f18a6e..df7c20e113 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/PriorTrainerSample.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/PriorTrainerSample.cs @@ -10,7 +10,7 @@ public static void Example() // Downloading the dataset from github.com/dotnet/machinelearning. // This will create a sentiment.tsv file in the filesystem. // You can open this file, if you want to see the data. - (string dataFile, _ ) = SamplesUtils.DatasetUtils.DownloadSentimentDataset(); + string dataFile = SamplesUtils.DatasetUtils.DownloadSentimentDataset()[0]; // A preview of the data. // Sentiment SentimentText diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/RandomTrainerSample.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/RandomTrainerSample.cs index d821b63be1..11e83b4882 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/RandomTrainerSample.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/RandomTrainerSample.cs @@ -10,7 +10,7 @@ public static void Example() // Downloading the dataset from github.com/dotnet/machinelearning. // This will create a sentiment.tsv file in the filesystem. // You can open this file, if you want to see the data. - (string dataFile, _ ) = SamplesUtils.DatasetUtils.DownloadSentimentDataset(); + string dataFile = SamplesUtils.DatasetUtils.DownloadSentimentDataset()[0]; // A preview of the data. // Sentiment SentimentText diff --git a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs index da988be472..799382b230 100644 --- a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs +++ b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs @@ -78,11 +78,11 @@ public sealed class HousingRegression /// /// Downloads the wikipedia detox dataset from the ML.NET repo. /// - public static (string trainFile, string testFile) DownloadSentimentDataset() + public static string[] DownloadSentimentDataset() { var trainFile = Download("https://raw.githubusercontent.com/dotnet/machinelearning/76cb2cdf5cc8b6c88ca44b8969153836e589df04/test/data/wikipedia-detox-250-line-data.tsv", "sentiment.tsv"); var testFile = Download("https://raw.githubusercontent.com/dotnet/machinelearning/76cb2cdf5cc8b6c88ca44b8969153836e589df04/test/data/wikipedia-detox-250-line-test.tsv", "sentimenttest.tsv"); - return (trainFile, testFile); + return new[] { trainFile, testFile }; } /// @@ -95,11 +95,11 @@ public static string DownloadAdultDataset() /// Downloads the wikipedia detox dataset and featurizes it to be suitable for sentiment classification tasks. /// /// used for data loading and processing. - /// Featurized dataset. - public static (IDataView trainData, IDataView testData) LoadFeaturizedSentimentDataset(MLContext mlContext) + /// Featurized train and test dataset. + public static IDataView[] LoadFeaturizedSentimentDataset(MLContext mlContext) { - // Download the file - (string trainFile, string testFile) = DownloadSentimentDataset(); + // Download the files + var dataFiles = DownloadSentimentDataset(); // Define the columns to read var reader = mlContext.Data.CreateTextLoader( @@ -114,11 +114,11 @@ public static (IDataView trainData, IDataView testData) LoadFeaturizedSentimentD // Create data featurizing pipeline var pipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText"); - var data = reader.Read(trainFile); + var data = reader.Read(dataFiles[0]); var model = pipeline.Fit(data); var featurizedDataTrain = model.Transform(data); - var featurizedDataTest = model.Transform(reader.Read(testFile)); - return (featurizedDataTrain, featurizedDataTest); + var featurizedDataTest = model.Transform(reader.Read(dataFiles[1])); + return new[] { featurizedDataTrain, featurizedDataTest }; } /// diff --git a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs index 395c868c72..61314e309e 100644 --- a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs +++ b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs @@ -26,7 +26,7 @@ namespace Microsoft.ML.FactorizationMachine { /* Train a field-aware factorization machine using ADAGRAD (an advanced stochastic gradient method). See references below - for details. This trainer is essentially faster than the one introduced in [2] because of some implementation tricks[3]. + for details. This trainer is essentially faster than the one introduced in [2] because of some implementation tricks in [3]. [1] http://jmlr.org/papers/volume12/duchi11a/duchi11a.pdf [2] https://www.csie.ntu.edu.tw/~cjlin/papers/ffm.pdf [3] https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf @@ -42,28 +42,46 @@ public sealed class FieldAwareFactorizationMachineTrainer : ITrainer + /// Initial learning rate. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Initial learning rate", ShortName = "lr", SortOrder = 1)] [TlcModule.SweepableFloatParam(0.001f, 1.0f, isLogScale: true)] public float LearningRate = (float)0.1; + /// + /// Number of training iterations. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Number of training iterations", ShortName = "iter", SortOrder = 2)] [TlcModule.SweepableLongParam(1, 100)] - public int Iters = 5; + public int Iterations = 5; + /// + /// Latent space dimension. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Latent space dimension", ShortName = "d", SortOrder = 3)] [TlcModule.SweepableLongParam(4, 100)] - public int LatentDim = 20; + public int LatentDimension = 20; + /// + /// Regularization coefficient of linear weights. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Regularization coefficient of linear weights", ShortName = "lambdaLinear", SortOrder = 4)] [TlcModule.SweepableFloatParam(1e-8f, 1f, isLogScale: true)] public float LambdaLinear = 0.0001f; + /// + /// Regularization coefficient of latent weights. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Regularization coefficient of latent weights", ShortName = "lambdaLatent", SortOrder = 5)] [TlcModule.SweepableFloatParam(1e-8f, 1f, isLogScale: true)] public float LambdaLatent = 0.0001f; + /// + /// Whether to normalize the input vectors so that the concatenation of all fields' feature vectors is unit-length. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Whether to normalize the input vectors so that the concatenation of all fields' feature vectors is unit-length", ShortName = "norm", SortOrder = 6)] - public bool Norm = true; + public bool Normalize = true; /// /// Extra feature column names. The column named stores features from the first field. @@ -74,12 +92,21 @@ public sealed class Options : LearnerInputBaseWithWeight ShortName = "exfeat", SortOrder = 7)] public string[] ExtraFeatureColumns; + /// + /// Whether to shuffle for each training iteration. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Whether to shuffle for each training iteration", ShortName = "shuf", SortOrder = 90)] public bool Shuffle = true; + /// + /// Report traning progress or not. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Report traning progress or not", ShortName = "verbose", SortOrder = 91)] public bool Verbose = true; + /// + /// Radius of initial latent factors. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Radius of initial latent factors", ShortName = "rad", SortOrder = 110)] [TlcModule.SweepableFloatParam(0.1f, 1f)] public float Radius = 0.5f; @@ -154,14 +181,14 @@ internal FieldAwareFactorizationMachineTrainer(IHostEnvironment env, Options opt /// Initializes a new instance of . /// /// The private instance of . - /// The name of column hosting the features. The i-th element stores feature column of the i-th field. - /// The name of the label column. - /// The name of the optional weights' column. + /// The name of column hosting the features. The i-th element stores feature column of the i-th field. + /// The name of the label column. + /// The name of the weight column (optional). [BestFriend] internal FieldAwareFactorizationMachineTrainer(IHostEnvironment env, - string[] featureColumns, - string labelColumn = DefaultColumnNames.Label, - string weightColumn = null) + string[] featureColumnNames, + string labelColumnName = DefaultColumnNames.Label, + string weightColumnName = null) { Contracts.CheckValue(env, nameof(env)); _host = env.Register(LoadName); @@ -170,13 +197,13 @@ internal FieldAwareFactorizationMachineTrainer(IHostEnvironment env, Initialize(env, args); - FeatureColumns = new SchemaShape.Column[featureColumns.Length]; + FeatureColumns = new SchemaShape.Column[featureColumnNames.Length]; - for (int i = 0; i < featureColumns.Length; i++) - FeatureColumns[i] = new SchemaShape.Column(featureColumns[i], SchemaShape.Column.VectorKind.Vector, NumberDataViewType.Single, false); + for (int i = 0; i < featureColumnNames.Length; i++) + FeatureColumns[i] = new SchemaShape.Column(featureColumnNames[i], SchemaShape.Column.VectorKind.Vector, NumberDataViewType.Single, false); - LabelColumn = new SchemaShape.Column(labelColumn, SchemaShape.Column.VectorKind.Scalar, BooleanDataViewType.Instance, false); - WeightColumn = weightColumn != null ? new SchemaShape.Column(weightColumn, SchemaShape.Column.VectorKind.Scalar, NumberDataViewType.Single, false) : default; + LabelColumn = new SchemaShape.Column(labelColumnName, SchemaShape.Column.VectorKind.Scalar, BooleanDataViewType.Instance, false); + WeightColumn = weightColumnName != null ? new SchemaShape.Column(weightColumnName, SchemaShape.Column.VectorKind.Scalar, NumberDataViewType.Single, false) : default; } /// @@ -187,18 +214,18 @@ internal FieldAwareFactorizationMachineTrainer(IHostEnvironment env, /// private void Initialize(IHostEnvironment env, Options options) { - _host.CheckUserArg(options.LatentDim > 0, nameof(options.LatentDim), "Must be positive"); + _host.CheckUserArg(options.LatentDimension > 0, nameof(options.LatentDimension), "Must be positive"); _host.CheckUserArg(options.LambdaLinear >= 0, nameof(options.LambdaLinear), "Must be non-negative"); _host.CheckUserArg(options.LambdaLatent >= 0, nameof(options.LambdaLatent), "Must be non-negative"); _host.CheckUserArg(options.LearningRate > 0, nameof(options.LearningRate), "Must be positive"); - _host.CheckUserArg(options.Iters >= 0, nameof(options.Iters), "Must be non-negative"); - _latentDim = options.LatentDim; + _host.CheckUserArg(options.Iterations >= 0, nameof(options.Iterations), "Must be non-negative"); + _latentDim = options.LatentDimension; _latentDimAligned = FieldAwareFactorizationMachineUtils.GetAlignedVectorLength(_latentDim); _lambdaLinear = options.LambdaLinear; _lambdaLatent = options.LambdaLatent; _learningRate = options.LearningRate; - _numIterations = options.Iters; - _norm = options.Norm; + _numIterations = options.Iterations; + _norm = options.Normalize; _shuffle = options.Shuffle; _verbose = options.Verbose; _radius = options.Radius; @@ -342,7 +369,7 @@ private FieldAwareFactorizationMachineModelParameters TrainCore(IChannel ch, IPr if (predictor != null) { ch.Check(predictor.FeatureCount == totalFeatureCount, "Input model's feature count mismatches training feature count"); - ch.Check(predictor.LatentDim == _latentDim, "Input model's latent dimension mismatches trainer's"); + ch.Check(predictor.LatentDimension == _latentDim, "Input model's latent dimension mismatches trainer's"); } if (validData != null) { diff --git a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FieldAwareFactorizationMachineModelParameters.cs b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FieldAwareFactorizationMachineModelParameters.cs index 78000c948d..f1c63c1b42 100644 --- a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FieldAwareFactorizationMachineModelParameters.cs +++ b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FieldAwareFactorizationMachineModelParameters.cs @@ -26,9 +26,22 @@ public sealed class FieldAwareFactorizationMachineModelParameters : ModelParamet internal const string LoaderSignature = "FieldAwareFactMacPredict"; private protected override PredictionKind PredictionKind => PredictionKind.BinaryClassification; private bool _norm; - internal int FieldCount { get; } - internal int FeatureCount { get; } - internal int LatentDim { get; } + + /// + /// Get the number of fields. It's the symbol `m` in the doc: https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf + /// + public int FieldCount { get; } + + /// + /// Get the number of features. It's the symbol `n` in the doc: https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf + /// + public int FeatureCount { get; } + + /// + /// Get the latent dimension. It's the tlngth of `v_{j, f}` in the doc: https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf + /// + public int LatentDimension { get; } + internal int LatentDimAligned { get; } private readonly float[] _linearWeights; private readonly AlignedArray _latentWeightsAligned; @@ -54,7 +67,7 @@ private static VersionInfo GetVersionInfo() /// The latent dimensions, which is the length of `v_{j, f}` in the doc: https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf /// The linear coefficients of the features, which is the symbol `w` in the doc: https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf /// Latent representation of each feature. Note that one feature may have latent vectors - /// and each latent vector contains values. In the f-th field, the j-th feature's latent vector, `v_{j, f}` in the doc + /// and each latent vector contains values. In the f-th field, the j-th feature's latent vector, `v_{j, f}` in the doc /// https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf, starts at latentWeights[j * fieldCount * latentDim + f * latentDim]. /// The k-th element in v_{j, f} is latentWeights[j * fieldCount * latentDim + f * latentDim + k]. The size of the array must be featureCount x fieldCount x latentDim. internal FieldAwareFactorizationMachineModelParameters(IHostEnvironment env, bool norm, int fieldCount, int featureCount, int latentDim, @@ -70,7 +83,7 @@ internal FieldAwareFactorizationMachineModelParameters(IHostEnvironment env, boo _norm = norm; FieldCount = fieldCount; FeatureCount = featureCount; - LatentDim = latentDim; + LatentDimension = latentDim; _linearWeights = linearWeights; _latentWeightsAligned = new AlignedArray(FeatureCount * FieldCount * LatentDimAligned, 16); @@ -79,11 +92,11 @@ internal FieldAwareFactorizationMachineModelParameters(IHostEnvironment env, boo { for (int f = 0; f < FieldCount; f++) { - int index = j * FieldCount * LatentDim + f * LatentDim; + int index = j * FieldCount * LatentDimension + f * LatentDimension; int indexAligned = j * FieldCount * LatentDimAligned + f * LatentDimAligned; for (int k = 0; k < LatentDimAligned; k++) { - if (k < LatentDim) + if (k < LatentDimension) _latentWeightsAligned[indexAligned + k] = latentWeights[index + k]; else _latentWeightsAligned[indexAligned + k] = 0; @@ -105,7 +118,7 @@ internal FieldAwareFactorizationMachineModelParameters(IHostEnvironment env, boo _norm = norm; FieldCount = fieldCount; FeatureCount = featureCount; - LatentDim = latentDim; + LatentDimension = latentDim; _linearWeights = linearWeights; _latentWeightsAligned = latentWeightsAligned; } @@ -139,18 +152,18 @@ private FieldAwareFactorizationMachineModelParameters(IHostEnvironment env, Mode _norm = norm; FieldCount = fieldCount; FeatureCount = featureCount; - LatentDim = latentDim; + LatentDimension = latentDim; _linearWeights = linearWeights; _latentWeightsAligned = new AlignedArray(FeatureCount * FieldCount * LatentDimAligned, 16); for (int j = 0; j < FeatureCount; j++) { for (int f = 0; f < FieldCount; f++) { - int vBias = j * FieldCount * LatentDim + f * LatentDim; + int vBias = j * FieldCount * LatentDimension + f * LatentDimension; int vBiasAligned = j * FieldCount * LatentDimAligned + f * LatentDimAligned; for (int k = 0; k < LatentDimAligned; k++) { - if (k < LatentDim) + if (k < LatentDimension) _latentWeightsAligned[vBiasAligned + k] = latentWeights[vBias + k]; else _latentWeightsAligned[vBiasAligned + k] = 0; @@ -185,23 +198,23 @@ private protected override void SaveCore(ModelSaveContext ctx) Host.Assert(FieldCount > 0); Host.Assert(FeatureCount > 0); - Host.Assert(LatentDim > 0); + Host.Assert(LatentDimension > 0); Host.Assert(Utils.Size(_linearWeights) == FeatureCount); Host.Assert(_latentWeightsAligned.Size == FeatureCount * FieldCount * LatentDimAligned); ctx.Writer.Write(_norm); ctx.Writer.Write(FieldCount); ctx.Writer.Write(FeatureCount); - ctx.Writer.Write(LatentDim); + ctx.Writer.Write(LatentDimension); ctx.Writer.WriteSingleArray(_linearWeights); - float[] latentWeights = new float[FeatureCount * FieldCount * LatentDim]; + float[] latentWeights = new float[FeatureCount * FieldCount * LatentDimension]; for (int j = 0; j < FeatureCount; j++) { for (int f = 0; f < FieldCount; f++) { - int vBias = j * FieldCount * LatentDim + f * LatentDim; + int vBias = j * FieldCount * LatentDimension + f * LatentDimension; int vBiasAligned = j * FieldCount * LatentDimAligned + f * LatentDimAligned; - for (int k = 0; k < LatentDim; k++) + for (int k = 0; k < LatentDimension; k++) latentWeights[vBias + k] = _latentWeightsAligned[vBiasAligned + k]; } } @@ -237,43 +250,33 @@ internal void CopyLatentWeightsTo(AlignedArray latentWeights) latentWeights.CopyFrom(_latentWeightsAligned); } - /// - /// Get the number of fields. It's the symbol `m` in the doc: https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf - /// - public int GetFieldCount() => FieldCount; - - /// - /// Get the number of features. It's the symbol `n` in the doc: https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf - /// - public int GetFeatureCount() => FeatureCount; - - /// - /// Get the latent dimension. It's the tlngth of `v_{j, f}` in the doc: https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf - /// - public int GetLatentDim() => LatentDim; - /// /// The linear coefficients of the features. It's the symbol `w` in the doc: https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf /// - public float[] GetLinearWeights() => _linearWeights; + public float[] GetLinearWeights() + { + var linearWeights = new float[_linearWeights.Length]; + CopyLinearWeightsTo(linearWeights); + return linearWeights; + } /// /// Latent representation of each feature. Note that one feature may have latent vectors - /// and each latent vector contains values. In the f-th field, the j-th feature's latent vector, `v_{j, f}` in the doc + /// and each latent vector contains values. In the f-th field, the j-th feature's latent vector, `v_{j, f}` in the doc /// https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf, starts at latentWeights[j * fieldCount * latentDim + f * latentDim]. /// The k-th element in v_{j, f} is latentWeights[j * fieldCount * latentDim + f * latentDim + k]. /// The size of the returned value is featureCount x fieldCount x latentDim. /// public float[] GetLatentWeights() { - var latentWeights = new float[FeatureCount * FieldCount * LatentDim]; + var latentWeights = new float[FeatureCount * FieldCount * LatentDimension]; for (int j = 0; j < FeatureCount; j++) { for (int f = 0; f < FieldCount; f++) { - int index = j * FieldCount * LatentDim + f * LatentDim; + int index = j * FieldCount * LatentDimension + f * LatentDimension; int indexAligned = j * FieldCount * LatentDimAligned + f * LatentDimAligned; - for (int k = 0; k < LatentDim; k++) + for (int k = 0; k < LatentDimension; k++) { latentWeights[index + k] = _latentWeightsAligned[indexAligned + k]; } diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/FAFMEstimator.cs b/test/Microsoft.ML.Tests/TrainerEstimators/FAFMEstimator.cs index 359e44b7e1..f9e0bce066 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/FAFMEstimator.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/FAFMEstimator.cs @@ -49,8 +49,8 @@ public void FieldAwareFactorizationMachine_Estimator() FeatureColumn = "Feature1", // Features from the 1st field. ExtraFeatureColumns = new[] { "Feature2", "Feature3", "Feature4" }, // 2nd field's feature column, 3rd field's feature column, 4th field's feature column. Shuffle = false, - Iters = 3, - LatentDim = 7, + Iterations = 3, + LatentDimension = 7, }; var est = ML.BinaryClassification.Trainers.FieldAwareFactorizationMachine(ffmArgs); From 0bc0c92a703606631169a1656e15e17f05df8446 Mon Sep 17 00:00:00 2001 From: Zeeshan Ahmed Date: Tue, 26 Feb 2019 12:59:02 -0800 Subject: [PATCH 3/7] Fixed entrypoint json. --- test/BaselineOutput/Common/EntryPoints/core_manifest.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/BaselineOutput/Common/EntryPoints/core_manifest.json b/test/BaselineOutput/Common/EntryPoints/core_manifest.json index b36ebd329f..0d5c2856e9 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_manifest.json +++ b/test/BaselineOutput/Common/EntryPoints/core_manifest.json @@ -10119,7 +10119,7 @@ "IsNullable": false }, { - "Name": "Iters", + "Name": "Iterations", "Type": "Int", "Desc": "Number of training iterations", "Aliases": [ @@ -10148,7 +10148,7 @@ "Default": "Features" }, { - "Name": "LatentDim", + "Name": "LatentDimension", "Type": "Int", "Desc": "Latent space dimension", "Aliases": [ @@ -10245,7 +10245,7 @@ "Default": "Auto" }, { - "Name": "Norm", + "Name": "Normalize", "Type": "Bool", "Desc": "Whether to normalize the input vectors so that the concatenation of all fields' feature vectors is unit-length", "Aliases": [ From 4051d6b5fe7e97a95da0ac83998753f7d7f1ce7d Mon Sep 17 00:00:00 2001 From: Zeeshan Ahmed Date: Tue, 26 Feb 2019 12:59:02 -0800 Subject: [PATCH 4/7] Fixed entrypoint json. --- .../FactorizationMachine/FactorizationMachineTrainer.cs | 2 +- test/BaselineOutput/Common/EntryPoints/core_manifest.json | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs index 61314e309e..e52e7ad265 100644 --- a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs +++ b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs @@ -52,7 +52,7 @@ public sealed class Options : LearnerInputBaseWithWeight /// /// Number of training iterations. /// - [Argument(ArgumentType.AtMostOnce, HelpText = "Number of training iterations", ShortName = "iter", SortOrder = 2)] + [Argument(ArgumentType.AtMostOnce, HelpText = "Number of training iterations", ShortName = "iters", SortOrder = 2)] [TlcModule.SweepableLongParam(1, 100)] public int Iterations = 5; diff --git a/test/BaselineOutput/Common/EntryPoints/core_manifest.json b/test/BaselineOutput/Common/EntryPoints/core_manifest.json index b36ebd329f..0d46829792 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_manifest.json +++ b/test/BaselineOutput/Common/EntryPoints/core_manifest.json @@ -10119,11 +10119,11 @@ "IsNullable": false }, { - "Name": "Iters", + "Name": "Iterations", "Type": "Int", "Desc": "Number of training iterations", "Aliases": [ - "iter" + "iters" ], "Required": false, "SortOrder": 2.0, @@ -10148,7 +10148,7 @@ "Default": "Features" }, { - "Name": "LatentDim", + "Name": "LatentDimension", "Type": "Int", "Desc": "Latent space dimension", "Aliases": [ @@ -10245,7 +10245,7 @@ "Default": "Auto" }, { - "Name": "Norm", + "Name": "Normalize", "Type": "Bool", "Desc": "Whether to normalize the input vectors so that the concatenation of all fields' feature vectors is unit-length", "Aliases": [ From 3f7cf533fc81e738e28d82c03e542a1e6fd6b1da Mon Sep 17 00:00:00 2001 From: Zeeshan Ahmed Date: Tue, 26 Feb 2019 13:11:09 -0800 Subject: [PATCH 5/7] Resolved a compilation bug. --- src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs index d320781c5c..1b36ca6591 100644 --- a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs +++ b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs @@ -104,8 +104,8 @@ public static IDataView[] LoadFeaturizedSentimentDataset(MLContext mlContext) var reader = mlContext.Data.CreateTextLoader( columns: new[] { - new TextLoader.Column("Sentiment", DataKind.BL, 0), - new TextLoader.Column("SentimentText", DataKind.Text, 1) + new TextLoader.Column("Sentiment", DataKind.Boolean, 0), + new TextLoader.Column("SentimentText", DataKind.String, 1) }, hasHeader: true ); From 30faca7982f9cfc5c9f4082798636a2497538c65 Mon Sep 17 00:00:00 2001 From: Zeeshan Ahmed Date: Tue, 26 Feb 2019 15:24:02 -0800 Subject: [PATCH 6/7] Addressed reviewers' comments. --- .../FieldAwareFactorizationMachine.cs | 2 +- ...ldAwareFactorizationMachinewWithOptions.cs | 6 ++-- .../FactorizationMachineCatalog.cs | 12 +++---- .../FactorizationMachineTrainer.cs | 32 +++++++++---------- ...wareFactorizationMachineModelParameters.cs | 8 ++--- .../FactorizationMachineStatic.cs | 6 ++-- .../Common/EntryPoints/core_ep-list.tsv | 2 +- .../Common/EntryPoints/core_manifest.json | 5 +-- .../TrainerEstimators/FAFMEstimator.cs | 6 ++-- 9 files changed, 39 insertions(+), 40 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.cs index 55ec4d2349..294fdf678e 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.cs @@ -48,7 +48,7 @@ public static void Example() Console.WriteLine("The linear weights of some of the features are: " + string.Concat(Enumerable.Range(1, 10).Select(i => $"{linearWeights[i]:F4} "))); Console.WriteLine("The weights of some of the latent features are: " + - string.Concat(Enumerable.Range(1, 10).Select(i => $"{latentWeights[i]:F4} "))); + string.Concat(Enumerable.Range(1, 10).Select(i => $"{latentWeights[i]:F4} "))); // The feature count is: 9374 // The number of fields is: 1 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachinewWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachinewWithOptions.cs index 1a8d2eb27e..7e21c30845 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachinewWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachinewWithOptions.cs @@ -30,12 +30,12 @@ public static void Example() var pipeline = new EstimatorChain().AppendCacheCheckpoint(mlContext) .Append(mlContext.BinaryClassification.Trainers. FieldAwareFactorizationMachine( - new FieldAwareFactorizationMachineTrainer.Options + new FieldAwareFactorizationMachineBinaryClassificationTrainer.Options { FeatureColumn = "Features", LabelColumn = "Sentiment", LearningRate = 0.1f, - Iterations = 10 + NumberOfIterations = 10 })); // Fit the model. @@ -57,7 +57,7 @@ public static void Example() Console.WriteLine("The linear weights of some of the features are: " + string.Concat(Enumerable.Range(1, 10).Select(i => $"{linearWeights[i]:F4} "))); Console.WriteLine("The weights of some of the latent features are: " + - string.Concat(Enumerable.Range(1, 10).Select(i => $"{latentWeights[i]:F4} "))); + string.Concat(Enumerable.Range(1, 10).Select(i => $"{latentWeights[i]:F4} "))); // The feature count is: 9374 // The number of fields is: 1 diff --git a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineCatalog.cs b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineCatalog.cs index 6177cb4b43..ed5d657d3d 100644 --- a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineCatalog.cs +++ b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineCatalog.cs @@ -9,7 +9,7 @@ namespace Microsoft.ML { /// - /// Extension method to create + /// Extension method to create /// public static class FactorizationMachineExtensions { @@ -26,14 +26,14 @@ public static class FactorizationMachineExtensions /// [!code-csharp[FieldAwareFactorizationMachine](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.cs)] /// ]]> /// - public static FieldAwareFactorizationMachineTrainer FieldAwareFactorizationMachine(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, + public static FieldAwareFactorizationMachineBinaryClassificationTrainer FieldAwareFactorizationMachine(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, string[] featureColumnNames, string labelColumnName = DefaultColumnNames.Label, string exampleWeightColumnName = null) { Contracts.CheckValue(catalog, nameof(catalog)); var env = CatalogUtils.GetEnvironment(catalog); - return new FieldAwareFactorizationMachineTrainer(env, featureColumnNames, labelColumnName, exampleWeightColumnName); + return new FieldAwareFactorizationMachineBinaryClassificationTrainer(env, featureColumnNames, labelColumnName, exampleWeightColumnName); } /// @@ -47,12 +47,12 @@ public static FieldAwareFactorizationMachineTrainer FieldAwareFactorizationMachi /// [!code-csharp[FieldAwareFactorizationMachine](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachineWithOptions.cs)] /// ]]> /// - public static FieldAwareFactorizationMachineTrainer FieldAwareFactorizationMachine(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, - FieldAwareFactorizationMachineTrainer.Options options) + public static FieldAwareFactorizationMachineBinaryClassificationTrainer FieldAwareFactorizationMachine(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, + FieldAwareFactorizationMachineBinaryClassificationTrainer.Options options) { Contracts.CheckValue(catalog, nameof(catalog)); var env = CatalogUtils.GetEnvironment(catalog); - return new FieldAwareFactorizationMachineTrainer(env, options); + return new FieldAwareFactorizationMachineBinaryClassificationTrainer(env, options); } } } diff --git a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs index 3505e41092..ebe9eeef6a 100644 --- a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs +++ b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs @@ -15,12 +15,12 @@ using Microsoft.ML.Internal.Utilities; using Microsoft.ML.Trainers; -[assembly: LoadableClass(FieldAwareFactorizationMachineTrainer.Summary, typeof(FieldAwareFactorizationMachineTrainer), - typeof(FieldAwareFactorizationMachineTrainer.Options), new[] { typeof(SignatureBinaryClassifierTrainer), typeof(SignatureTrainer) } - , FieldAwareFactorizationMachineTrainer.UserName, FieldAwareFactorizationMachineTrainer.LoadName, - FieldAwareFactorizationMachineTrainer.ShortName, DocName = "trainer/FactorizationMachine.md")] +[assembly: LoadableClass(FieldAwareFactorizationMachineBinaryClassificationTrainer.Summary, typeof(FieldAwareFactorizationMachineBinaryClassificationTrainer), + typeof(FieldAwareFactorizationMachineBinaryClassificationTrainer.Options), new[] { typeof(SignatureBinaryClassifierTrainer), typeof(SignatureTrainer) } + , FieldAwareFactorizationMachineBinaryClassificationTrainer.UserName, FieldAwareFactorizationMachineBinaryClassificationTrainer.LoadName, + FieldAwareFactorizationMachineBinaryClassificationTrainer.ShortName, DocName = "trainer/FactorizationMachine.md")] -[assembly: LoadableClass(typeof(void), typeof(FieldAwareFactorizationMachineTrainer), null, typeof(SignatureEntryPointModule), FieldAwareFactorizationMachineTrainer.LoadName)] +[assembly: LoadableClass(typeof(void), typeof(FieldAwareFactorizationMachineBinaryClassificationTrainer), null, typeof(SignatureEntryPointModule), FieldAwareFactorizationMachineBinaryClassificationTrainer.LoadName)] namespace Microsoft.ML.FactorizationMachine { @@ -32,7 +32,7 @@ namespace Microsoft.ML.FactorizationMachine [3] https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf */ /// - public sealed class FieldAwareFactorizationMachineTrainer : ITrainer, + public sealed class FieldAwareFactorizationMachineBinaryClassificationTrainer : ITrainer, IEstimator { internal const string Summary = "Train a field-aware factorization machine for binary classification"; @@ -52,9 +52,9 @@ public sealed class Options : LearnerInputBaseWithWeight /// /// Number of training iterations. /// - [Argument(ArgumentType.AtMostOnce, HelpText = "Number of training iterations", ShortName = "iters", SortOrder = 2)] + [Argument(ArgumentType.AtMostOnce, HelpText = "Number of training iterations", ShortName = "iters,iter", SortOrder = 2)] [TlcModule.SweepableLongParam(1, 100)] - public int Iterations = 5; + public int NumberOfIterations = 5; /// /// Latent space dimension. @@ -151,12 +151,12 @@ public sealed class Options : LearnerInputBaseWithWeight private float _radius; /// - /// Initializes a new instance of through the class. + /// Initializes a new instance of through the class. /// /// The private instance of . /// An instance of the legacy to apply advanced parameters to the algorithm. [BestFriend] - internal FieldAwareFactorizationMachineTrainer(IHostEnvironment env, Options options) + internal FieldAwareFactorizationMachineBinaryClassificationTrainer(IHostEnvironment env, Options options) { Contracts.CheckValue(env, nameof(env)); _host = env.Register(LoadName); @@ -178,14 +178,14 @@ internal FieldAwareFactorizationMachineTrainer(IHostEnvironment env, Options opt } /// - /// Initializes a new instance of . + /// Initializes a new instance of . /// /// The private instance of . /// The name of column hosting the features. The i-th element stores feature column of the i-th field. /// The name of the label column. /// The name of the weight column (optional). [BestFriend] - internal FieldAwareFactorizationMachineTrainer(IHostEnvironment env, + internal FieldAwareFactorizationMachineBinaryClassificationTrainer(IHostEnvironment env, string[] featureColumnNames, string labelColumnName = DefaultColumnNames.Label, string weightColumnName = null) @@ -218,13 +218,13 @@ private void Initialize(IHostEnvironment env, Options options) _host.CheckUserArg(options.LambdaLinear >= 0, nameof(options.LambdaLinear), "Must be non-negative"); _host.CheckUserArg(options.LambdaLatent >= 0, nameof(options.LambdaLatent), "Must be non-negative"); _host.CheckUserArg(options.LearningRate > 0, nameof(options.LearningRate), "Must be positive"); - _host.CheckUserArg(options.Iterations >= 0, nameof(options.Iterations), "Must be non-negative"); + _host.CheckUserArg(options.NumberOfIterations >= 0, nameof(options.NumberOfIterations), "Must be non-negative"); _latentDim = options.LatentDimension; _latentDimAligned = FieldAwareFactorizationMachineUtils.GetAlignedVectorLength(_latentDim); _lambdaLinear = options.LambdaLinear; _lambdaLatent = options.LambdaLatent; _learningRate = options.LearningRate; - _numIterations = options.Iterations; + _numIterations = options.NumberOfIterations; _norm = options.Normalize; _shuffle = options.Shuffle; _verbose = options.Verbose; @@ -514,12 +514,12 @@ internal static CommonOutputs.BinaryClassificationOutput TrainBinary(IHostEnviro var host = env.Register("Train a field-aware factorization machine"); host.CheckValue(input, nameof(input)); EntryPointUtils.CheckInputArgs(host, input); - return LearnerEntryPointsUtils.Train(host, input, () => new FieldAwareFactorizationMachineTrainer(host, input), + return LearnerEntryPointsUtils.Train(host, input, () => new FieldAwareFactorizationMachineBinaryClassificationTrainer(host, input), () => LearnerEntryPointsUtils.FindColumn(host, input.TrainingData.Schema, input.LabelColumn)); } /// - /// Continues the training of a using an already trained and/or validation data, + /// Continues the training of a using an already trained and/or validation data, /// and returns a . /// public FieldAwareFactorizationMachinePredictionTransformer Fit(IDataView trainData, diff --git a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FieldAwareFactorizationMachineModelParameters.cs b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FieldAwareFactorizationMachineModelParameters.cs index f1c63c1b42..9160ea1331 100644 --- a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FieldAwareFactorizationMachineModelParameters.cs +++ b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FieldAwareFactorizationMachineModelParameters.cs @@ -253,11 +253,9 @@ internal void CopyLatentWeightsTo(AlignedArray latentWeights) /// /// The linear coefficients of the features. It's the symbol `w` in the doc: https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf /// - public float[] GetLinearWeights() + public IReadOnlyList GetLinearWeights() { - var linearWeights = new float[_linearWeights.Length]; - CopyLinearWeightsTo(linearWeights); - return linearWeights; + return _linearWeights; } /// @@ -267,7 +265,7 @@ public float[] GetLinearWeights() /// The k-th element in v_{j, f} is latentWeights[j * fieldCount * latentDim + f * latentDim + k]. /// The size of the returned value is featureCount x fieldCount x latentDim. /// - public float[] GetLatentWeights() + public IReadOnlyList GetLatentWeights() { var latentWeights = new float[FeatureCount * FieldCount * LatentDimension]; for (int j = 0; j < FeatureCount; j++) diff --git a/src/Microsoft.ML.StaticPipe/FactorizationMachineStatic.cs b/src/Microsoft.ML.StaticPipe/FactorizationMachineStatic.cs index 160eb7ae0a..693ddf7015 100644 --- a/src/Microsoft.ML.StaticPipe/FactorizationMachineStatic.cs +++ b/src/Microsoft.ML.StaticPipe/FactorizationMachineStatic.cs @@ -40,7 +40,7 @@ public static (Scalar score, Scalar predictedLabel) FieldAwareFacto var rec = new CustomReconciler((env, labelCol, featureCols) => { - var trainer = new FieldAwareFactorizationMachineTrainer(env, featureCols, labelCol); + var trainer = new FieldAwareFactorizationMachineBinaryClassificationTrainer(env, featureCols, labelCol); if (onFit != null) return trainer.WithOnFitDelegate(trans => onFit(trans.Model)); @@ -66,7 +66,7 @@ public static (Scalar score, Scalar predictedLabel) FieldAwareFacto /// The predicted output. public static (Scalar score, Scalar predictedLabel) FieldAwareFactorizationMachine(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, Scalar label, Vector[] features, - FieldAwareFactorizationMachineTrainer.Options options, + FieldAwareFactorizationMachineBinaryClassificationTrainer.Options options, Action onFit = null) { Contracts.CheckValue(label, nameof(label)); @@ -77,7 +77,7 @@ public static (Scalar score, Scalar predictedLabel) FieldAwareFacto var rec = new CustomReconciler((env, labelCol, featureCols) => { - var trainer = new FieldAwareFactorizationMachineTrainer(env, options); + var trainer = new FieldAwareFactorizationMachineBinaryClassificationTrainer(env, options); if (onFit != null) return trainer.WithOnFitDelegate(trans => onFit(trans.Model)); else diff --git a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv index 172d030b01..c65d3c7aef 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv +++ b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv @@ -49,7 +49,7 @@ Trainers.FastTreeBinaryClassifier Uses a logit-boost boosted tree learner to per Trainers.FastTreeRanker Trains gradient boosted decision trees to the LambdaRank quasi-gradient. Microsoft.ML.Trainers.FastTree.FastTree TrainRanking Microsoft.ML.Trainers.FastTree.FastTreeRankingTrainer+Options Microsoft.ML.EntryPoints.CommonOutputs+RankingOutput Trainers.FastTreeRegressor Trains gradient boosted decision trees to fit target values using least-squares. Microsoft.ML.Trainers.FastTree.FastTree TrainRegression Microsoft.ML.Trainers.FastTree.FastTreeRegressionTrainer+Options Microsoft.ML.EntryPoints.CommonOutputs+RegressionOutput Trainers.FastTreeTweedieRegressor Trains gradient boosted decision trees to fit target values using a Tweedie loss function. This learner is a generalization of Poisson, compound Poisson, and gamma regression. Microsoft.ML.Trainers.FastTree.FastTree TrainTweedieRegression Microsoft.ML.Trainers.FastTree.FastTreeTweedieTrainer+Options Microsoft.ML.EntryPoints.CommonOutputs+RegressionOutput -Trainers.FieldAwareFactorizationMachineBinaryClassifier Train a field-aware factorization machine for binary classification Microsoft.ML.FactorizationMachine.FieldAwareFactorizationMachineTrainer TrainBinary Microsoft.ML.FactorizationMachine.FieldAwareFactorizationMachineTrainer+Options Microsoft.ML.EntryPoints.CommonOutputs+BinaryClassificationOutput +Trainers.FieldAwareFactorizationMachineBinaryClassifier Train a field-aware factorization machine for binary classification Microsoft.ML.FactorizationMachine.FieldAwareFactorizationMachineBinaryClassificationTrainer TrainBinary Microsoft.ML.FactorizationMachine.FieldAwareFactorizationMachineBinaryClassificationTrainer+Options Microsoft.ML.EntryPoints.CommonOutputs+BinaryClassificationOutput Trainers.GeneralizedAdditiveModelBinaryClassifier Trains a gradient boosted stump per feature, on all features simultaneously, to fit target values using least-squares. It mantains no interactions between features. Microsoft.ML.Trainers.FastTree.Gam TrainBinary Microsoft.ML.Trainers.FastTree.BinaryClassificationGamTrainer+Options Microsoft.ML.EntryPoints.CommonOutputs+BinaryClassificationOutput Trainers.GeneralizedAdditiveModelRegressor Trains a gradient boosted stump per feature, on all features simultaneously, to fit target values using least-squares. It mantains no interactions between features. Microsoft.ML.Trainers.FastTree.Gam TrainRegression Microsoft.ML.Trainers.FastTree.RegressionGamTrainer+Options Microsoft.ML.EntryPoints.CommonOutputs+RegressionOutput Trainers.KMeansPlusPlusClusterer K-means is a popular clustering algorithm. With K-means, the data is clustered into a specified number of clusters in order to minimize the within-cluster sum of squares. K-means++ improves upon K-means by using a better method for choosing the initial cluster centers. Microsoft.ML.Trainers.KMeans.KMeansPlusPlusTrainer TrainKMeans Microsoft.ML.Trainers.KMeans.KMeansPlusPlusTrainer+Options Microsoft.ML.EntryPoints.CommonOutputs+ClusteringOutput diff --git a/test/BaselineOutput/Common/EntryPoints/core_manifest.json b/test/BaselineOutput/Common/EntryPoints/core_manifest.json index b3108abdac..ad3858fdca 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_manifest.json +++ b/test/BaselineOutput/Common/EntryPoints/core_manifest.json @@ -10119,11 +10119,12 @@ "IsNullable": false }, { - "Name": "Iterations", + "Name": "NumberOfIterations", "Type": "Int", "Desc": "Number of training iterations", "Aliases": [ - "iters" + "iters", + "iter" ], "Required": false, "SortOrder": 2.0, diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/FAFMEstimator.cs b/test/Microsoft.ML.Tests/TrainerEstimators/FAFMEstimator.cs index 9ede57ee94..19d08918ed 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/FAFMEstimator.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/FAFMEstimator.cs @@ -19,7 +19,7 @@ public void FfmBinaryClassificationWithAdvancedArguments() var data = DatasetUtils.GenerateFfmSamples(500); var dataView = mlContext.Data.ReadFromEnumerable(data); - var ffmArgs = new FieldAwareFactorizationMachineTrainer.Options(); + var ffmArgs = new FieldAwareFactorizationMachineBinaryClassificationTrainer.Options(); // Customized the field names. ffmArgs.FeatureColumn = nameof(DatasetUtils.FfmExample.Field0); // First field. @@ -44,11 +44,11 @@ public void FieldAwareFactorizationMachine_Estimator() var data = new TextLoader(Env, GetFafmBCLoaderArgs()) .Read(GetDataPath(TestDatasets.breastCancer.trainFilename)); - var ffmArgs = new FieldAwareFactorizationMachineTrainer.Options { + var ffmArgs = new FieldAwareFactorizationMachineBinaryClassificationTrainer.Options { FeatureColumn = "Feature1", // Features from the 1st field. ExtraFeatureColumns = new[] { "Feature2", "Feature3", "Feature4" }, // 2nd field's feature column, 3rd field's feature column, 4th field's feature column. Shuffle = false, - Iterations = 3, + NumberOfIterations = 3, LatentDimension = 7, }; From 003b6a0c21165b6bda5f9c722f4f9fab2874fc83 Mon Sep 17 00:00:00 2001 From: Zeeshan Ahmed Date: Tue, 26 Feb 2019 16:42:11 -0800 Subject: [PATCH 7/7] Addressed reviewers' comments. --- .../BinaryClassification/FieldAwareFactorizationMachine.cs | 1 + .../FieldAwareFactorizationMachinewWithOptions.cs | 2 +- .../FieldAwareFactorizationMachineModelParameters.cs | 5 +---- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.cs index 294fdf678e..8c87c899a2 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.cs @@ -1,6 +1,7 @@ using System; using System.Linq; using Microsoft.ML.Data; + namespace Microsoft.ML.Samples.Dynamic { public static class FFMBinaryClassification diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachinewWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachinewWithOptions.cs index 7e21c30845..c93b735a59 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachinewWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachinewWithOptions.cs @@ -1,7 +1,7 @@ using System; using System.Linq; using Microsoft.ML.Data; -using Microsoft.ML.FactorizationMachine; +using Microsoft.ML.Trainers.FactorizationMachine; namespace Microsoft.ML.Samples.Dynamic { diff --git a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FieldAwareFactorizationMachineModelParameters.cs b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FieldAwareFactorizationMachineModelParameters.cs index 9fcc39e716..8779d5a48c 100644 --- a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FieldAwareFactorizationMachineModelParameters.cs +++ b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FieldAwareFactorizationMachineModelParameters.cs @@ -253,10 +253,7 @@ internal void CopyLatentWeightsTo(AlignedArray latentWeights) /// /// The linear coefficients of the features. It's the symbol `w` in the doc: https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf /// - public IReadOnlyList GetLinearWeights() - { - return _linearWeights; - } + public IReadOnlyList GetLinearWeights() => _linearWeights; /// /// Latent representation of each feature. Note that one feature may have latent vectors