diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Calibrator.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Calibrator.cs index 30f95df416..9dc343f7c1 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Calibrator.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Calibrator.cs @@ -16,7 +16,7 @@ public static void Example() // This will create a sentiment.tsv file in the filesystem. // The string, dataFile, is the path to the downloaded file. // You can open this file, if you want to see the data. - string dataFile = SamplesUtils.DatasetUtils.DownloadSentimentDataset(); + string dataFile = SamplesUtils.DatasetUtils.DownloadSentimentDataset()[0]; // A preview of the data. // Sentiment SentimentText diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/FieldAwareFactorizationMachine.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/FieldAwareFactorizationMachine.cs deleted file mode 100644 index 678796d54e..0000000000 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/FieldAwareFactorizationMachine.cs +++ /dev/null @@ -1,71 +0,0 @@ -using System; -using Microsoft.ML.Data; -namespace Microsoft.ML.Samples.Dynamic -{ - public static class FFMBinaryClassification - { - public static void Example() - { - // Downloading the dataset from github.com/dotnet/machinelearning. - // This will create a sentiment.tsv file in the filesystem. - // You can open this file, if you want to see the data. - string dataFile = SamplesUtils.DatasetUtils.DownloadSentimentDataset(); - - // A preview of the data. - // Sentiment SentimentText - // 0 " :Erm, thank you. " - // 1 ==You're cool== - - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - var mlContext = new MLContext(); - - // Step 1: Read the data as an IDataView. - // First, we define the reader: specify the data columns and where to find them in the text file. - var reader = mlContext.Data.CreateTextLoader( - columns: new[] - { - new TextLoader.Column("Sentiment", DataKind.Boolean, 0), - new TextLoader.Column("SentimentText", DataKind.String, 1) - }, - hasHeader: true - ); - - // Read the data - var data = reader.Read(dataFile); - - // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times, it can be slow due to - // expensive featurization and disk operations. When the considered data can fit into memory, a solution is to cache the data in memory. Caching is especially - // helpful when working with iterative algorithms which needs many data passes. Since SDCA is the case, we cache. Inserting a - // cache step in a pipeline is also possible, please see the construction of pipeline below. - data = mlContext.Data.Cache(data); - - // Step 2: Pipeline - // Featurize the text column through the FeaturizeText API. - // Then append a binary classifier, setting the "Label" column as the label of the dataset, and - // the "Features" column produced by FeaturizeText as the features column. - var pipeline = mlContext.Transforms.Text.FeaturizeText("SentimentText", "Features") - .AppendCacheCheckpoint(mlContext) // Add a data-cache step within a pipeline. - .Append(mlContext.BinaryClassification.Trainers.FieldAwareFactorizationMachine(labelColumnName: "Sentiment", featureColumnNames: new[] { "Features" })); - - // Fit the model. - var model = pipeline.Fit(data); - - // Let's get the model parameters from the model. - var modelParams = model.LastTransformer.Model; - - // Let's inspect the model parameters. - var featureCount = modelParams.GetFeatureCount(); - var fieldCount = modelParams.GetFieldCount(); - var latentDim = modelParams.GetLatentDim(); - var linearWeights = modelParams.GetLinearWeights(); - var latentWeights = modelParams.GetLatentWeights(); - - Console.WriteLine("The feature count is: " + featureCount); - Console.WriteLine("The number of fields is: " + fieldCount); - Console.WriteLine("The latent dimension is: " + latentDim); - Console.WriteLine("The lineear weights of the features are: " + string.Join(", ", linearWeights)); - Console.WriteLine("The weights of the latent features are: " + string.Join(", ", latentWeights)); - } - } -} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.cs new file mode 100644 index 0000000000..8c87c899a2 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.cs @@ -0,0 +1,75 @@ +using System; +using System.Linq; +using Microsoft.ML.Data; + +namespace Microsoft.ML.Samples.Dynamic +{ + public static class FFMBinaryClassification + { + public static void Example() + { + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + var mlContext = new MLContext(); + + // Download and featurize the dataset. + var dataviews = SamplesUtils.DatasetUtils.LoadFeaturizedSentimentDataset(mlContext); + var trainData = dataviews[0]; + var testData = dataviews[1]; + + // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times, it can be slow due to + // expensive featurization and disk operations. When the considered data can fit into memory, a solution is to cache the data in memory. Caching is especially + // helpful when working with iterative algorithms which needs many data passes. Since SDCA is the case, we cache. Inserting a + // cache step in a pipeline is also possible, please see the construction of pipeline below. + trainData = mlContext.Data.Cache(trainData); + + // Step 2: Pipeline + // Create the 'FieldAwareFactorizationMachine' binary classifier, setting the "Sentiment" column as the label of the dataset, and + // the "Features" column as the features column. + var pipeline = new EstimatorChain().AppendCacheCheckpoint(mlContext) + .Append(mlContext.BinaryClassification.Trainers. + FieldAwareFactorizationMachine(labelColumnName: "Sentiment", featureColumnNames: new[] { "Features" })); + + // Fit the model. + var model = pipeline.Fit(trainData); + + // Let's get the model parameters from the model. + var modelParams = model.LastTransformer.Model; + + // Let's inspect the model parameters. + var featureCount = modelParams.FeatureCount; + var fieldCount = modelParams.FieldCount; + var latentDim = modelParams.LatentDimension; + var linearWeights = modelParams.GetLinearWeights(); + var latentWeights = modelParams.GetLatentWeights(); + + Console.WriteLine("The feature count is: " + featureCount); + Console.WriteLine("The number of fields is: " + fieldCount); + Console.WriteLine("The latent dimension is: " + latentDim); + Console.WriteLine("The linear weights of some of the features are: " + + string.Concat(Enumerable.Range(1, 10).Select(i => $"{linearWeights[i]:F4} "))); + Console.WriteLine("The weights of some of the latent features are: " + + string.Concat(Enumerable.Range(1, 10).Select(i => $"{latentWeights[i]:F4} "))); + + // The feature count is: 9374 + // The number of fields is: 1 + // The latent dimension is: 20 + // The linear weights of some of the features are: 0.0196 0.0000 -0.0045 -0.0205 0.0000 0.0032 0.0682 0.0091 -0.0151 0.0089 + // The weights of some of the latent features are: 0.3316 0.2140 0.0752 0.0908 -0.0495 -0.0810 0.0761 0.0966 0.0090 -0.0962 + + // Evaluate how the model is doing on the test data. + var dataWithPredictions = model.Transform(testData); + + var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions, "Sentiment"); + SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + + // Accuracy: 0.72 + // AUC: 0.75 + // F1 Score: 0.74 + // Negative Precision: 0.75 + // Negative Recall: 0.67 + // Positive Precision: 0.70 + // Positive Recall: 0.78 + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachinewWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachinewWithOptions.cs new file mode 100644 index 0000000000..c93b735a59 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachinewWithOptions.cs @@ -0,0 +1,83 @@ +using System; +using System.Linq; +using Microsoft.ML.Data; +using Microsoft.ML.Trainers.FactorizationMachine; + +namespace Microsoft.ML.Samples.Dynamic +{ + public static class FFMBinaryClassificationWithOptions + { + public static void Example() + { + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + var mlContext = new MLContext(); + + // Download and featurize the dataset. + var dataviews = SamplesUtils.DatasetUtils.LoadFeaturizedSentimentDataset(mlContext); + var trainData = dataviews[0]; + var testData = dataviews[1]; + + // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times, it can be slow due to + // expensive featurization and disk operations. When the considered data can fit into memory, a solution is to cache the data in memory. Caching is especially + // helpful when working with iterative algorithms which needs many data passes. Since SDCA is the case, we cache. Inserting a + // cache step in a pipeline is also possible, please see the construction of pipeline below. + trainData = mlContext.Data.Cache(trainData); + + // Step 2: Pipeline + // Create the 'FieldAwareFactorizationMachine' binary classifier, setting the "Sentiment" column as the label of the dataset, and + // the "Features" column as the features column. + var pipeline = new EstimatorChain().AppendCacheCheckpoint(mlContext) + .Append(mlContext.BinaryClassification.Trainers. + FieldAwareFactorizationMachine( + new FieldAwareFactorizationMachineBinaryClassificationTrainer.Options + { + FeatureColumn = "Features", + LabelColumn = "Sentiment", + LearningRate = 0.1f, + NumberOfIterations = 10 + })); + + // Fit the model. + var model = pipeline.Fit(trainData); + + // Let's get the model parameters from the model. + var modelParams = model.LastTransformer.Model; + + // Let's inspect the model parameters. + var featureCount = modelParams.FeatureCount; + var fieldCount = modelParams.FieldCount; + var latentDim = modelParams.LatentDimension; + var linearWeights = modelParams.GetLinearWeights(); + var latentWeights = modelParams.GetLatentWeights(); + + Console.WriteLine("The feature count is: " + featureCount); + Console.WriteLine("The number of fields is: " + fieldCount); + Console.WriteLine("The latent dimension is: " + latentDim); + Console.WriteLine("The linear weights of some of the features are: " + + string.Concat(Enumerable.Range(1, 10).Select(i => $"{linearWeights[i]:F4} "))); + Console.WriteLine("The weights of some of the latent features are: " + + string.Concat(Enumerable.Range(1, 10).Select(i => $"{latentWeights[i]:F4} "))); + + // The feature count is: 9374 + // The number of fields is: 1 + // The latent dimension is: 20 + // The linear weights of some of the features are: 0.0410 0.0000 -0.0078 -0.0285 0.0000 0.0114 0.1313 0.0183 -0.0224 0.0166 + // The weights of some of the latent features are: -0.0326 0.1127 0.0621 0.1446 0.2038 0.1608 0.2084 0.0141 0.2458 -0.0625 + + // Evaluate how the model is doing on the test data. + var dataWithPredictions = model.Transform(testData); + + var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions, "Sentiment"); + SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + + // Accuracy: 0.78 + // AUC: 0.81 + // F1 Score: 0.78 + // Negative Precision: 0.78 + // Negative Recall: 0.78 + // Positive Precision: 0.78 + // Positive Recall: 0.78 + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SDCALogisticRegression.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SDCALogisticRegression.cs index 0b5347ebc0..db34ef01db 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SDCALogisticRegression.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SDCALogisticRegression.cs @@ -12,7 +12,7 @@ public static void Example() // Downloading the dataset from github.com/dotnet/machinelearning. // This will create a sentiment.tsv file in the filesystem. // You can open this file, if you want to see the data. - string dataFile = SamplesUtils.DatasetUtils.DownloadSentimentDataset(); + string dataFile = SamplesUtils.DatasetUtils.DownloadSentimentDataset()[0]; // A preview of the data. // Sentiment SentimentText diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/PriorTrainerSample.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/PriorTrainerSample.cs index 142ad64362..313b73f4ca 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/PriorTrainerSample.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/PriorTrainerSample.cs @@ -10,7 +10,7 @@ public static void Example() // Downloading the dataset from github.com/dotnet/machinelearning. // This will create a sentiment.tsv file in the filesystem. // You can open this file, if you want to see the data. - string dataFile = SamplesUtils.DatasetUtils.DownloadSentimentDataset(); + string dataFile = SamplesUtils.DatasetUtils.DownloadSentimentDataset()[0]; // A preview of the data. // Sentiment SentimentText diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/RandomTrainerSample.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/RandomTrainerSample.cs index a58b5cf100..a2aa752643 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/RandomTrainerSample.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/RandomTrainerSample.cs @@ -10,7 +10,7 @@ public static void Example() // Downloading the dataset from github.com/dotnet/machinelearning. // This will create a sentiment.tsv file in the filesystem. // You can open this file, if you want to see the data. - string dataFile = SamplesUtils.DatasetUtils.DownloadSentimentDataset(); + string dataFile = SamplesUtils.DatasetUtils.DownloadSentimentDataset()[0]; // A preview of the data. // Sentiment SentimentText diff --git a/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj b/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj index aebf45c592..63f92ff47d 100644 --- a/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj +++ b/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj @@ -22,6 +22,7 @@ + diff --git a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs index 3b1e68d42e..5196139ff8 100644 --- a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs +++ b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs @@ -77,14 +77,48 @@ public sealed class HousingRegression /// /// Downloads the wikipedia detox dataset from the ML.NET repo. /// - public static string DownloadSentimentDataset() - => Download("https://raw.githubusercontent.com/dotnet/machinelearning/76cb2cdf5cc8b6c88ca44b8969153836e589df04/test/data/wikipedia-detox-250-line-data.tsv", "sentiment.tsv"); + public static string[] DownloadSentimentDataset() + { + var trainFile = Download("https://raw.githubusercontent.com/dotnet/machinelearning/76cb2cdf5cc8b6c88ca44b8969153836e589df04/test/data/wikipedia-detox-250-line-data.tsv", "sentiment.tsv"); + var testFile = Download("https://raw.githubusercontent.com/dotnet/machinelearning/76cb2cdf5cc8b6c88ca44b8969153836e589df04/test/data/wikipedia-detox-250-line-test.tsv", "sentimenttest.tsv"); + return new[] { trainFile, testFile }; + } + + /// + /// Downloads the adult dataset from the ML.NET repo. + /// + public static string DownloadAdultDataset() + => Download("https://raw.githubusercontent.com/dotnet/machinelearning/244a8c2ac832657af282aa312d568211698790aa/test/data/adult.train", "adult.txt"); /// - /// Downloads the adult dataset from the ML.NET repo. + /// Downloads the wikipedia detox dataset and featurizes it to be suitable for sentiment classification tasks. /// - public static string DownloadAdultDataset() - => Download("https://raw.githubusercontent.com/dotnet/machinelearning/244a8c2ac832657af282aa312d568211698790aa/test/data/adult.train", "adult.txt"); + /// used for data loading and processing. + /// Featurized train and test dataset. + public static IDataView[] LoadFeaturizedSentimentDataset(MLContext mlContext) + { + // Download the files + var dataFiles = DownloadSentimentDataset(); + + // Define the columns to read + var reader = mlContext.Data.CreateTextLoader( + columns: new[] + { + new TextLoader.Column("Sentiment", DataKind.Boolean, 0), + new TextLoader.Column("SentimentText", DataKind.String, 1) + }, + hasHeader: true + ); + + // Create data featurizing pipeline + var pipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText"); + + var data = reader.Read(dataFiles[0]); + var model = pipeline.Fit(data); + var featurizedDataTrain = model.Transform(data); + var featurizedDataTest = model.Transform(reader.Read(dataFiles[1])); + return new[] { featurizedDataTrain, featurizedDataTest }; + } /// /// Downloads the Adult UCI dataset and featurizes it to be suitable for classification tasks. diff --git a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineCatalog.cs b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineCatalog.cs index f493f84cf5..4ffffafd7e 100644 --- a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineCatalog.cs +++ b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineCatalog.cs @@ -8,7 +8,7 @@ namespace Microsoft.ML { /// - /// Extension method to create + /// Extension method to create /// public static class FactorizationMachineExtensions { @@ -22,17 +22,17 @@ public static class FactorizationMachineExtensions /// /// /// /// - public static FieldAwareFactorizationMachineTrainer FieldAwareFactorizationMachine(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, + public static FieldAwareFactorizationMachineBinaryClassificationTrainer FieldAwareFactorizationMachine(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, string[] featureColumnNames, string labelColumnName = DefaultColumnNames.Label, string exampleWeightColumnName = null) { Contracts.CheckValue(catalog, nameof(catalog)); var env = CatalogUtils.GetEnvironment(catalog); - return new FieldAwareFactorizationMachineTrainer(env, featureColumnNames, labelColumnName, exampleWeightColumnName); + return new FieldAwareFactorizationMachineBinaryClassificationTrainer(env, featureColumnNames, labelColumnName, exampleWeightColumnName); } /// @@ -40,12 +40,18 @@ public static FieldAwareFactorizationMachineTrainer FieldAwareFactorizationMachi /// /// The binary classification catalog trainer object. /// Advanced arguments to the algorithm. - public static FieldAwareFactorizationMachineTrainer FieldAwareFactorizationMachine(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, - FieldAwareFactorizationMachineTrainer.Options options) + /// + /// + /// + /// + public static FieldAwareFactorizationMachineBinaryClassificationTrainer FieldAwareFactorizationMachine(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, + FieldAwareFactorizationMachineBinaryClassificationTrainer.Options options) { Contracts.CheckValue(catalog, nameof(catalog)); var env = CatalogUtils.GetEnvironment(catalog); - return new FieldAwareFactorizationMachineTrainer(env, options); + return new FieldAwareFactorizationMachineBinaryClassificationTrainer(env, options); } } } diff --git a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs index 1f10456456..77974e1457 100644 --- a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs +++ b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs @@ -14,24 +14,24 @@ using Microsoft.ML.Internal.Utilities; using Microsoft.ML.Trainers.FactorizationMachine; -[assembly: LoadableClass(FieldAwareFactorizationMachineTrainer.Summary, typeof(FieldAwareFactorizationMachineTrainer), - typeof(FieldAwareFactorizationMachineTrainer.Options), new[] { typeof(SignatureBinaryClassifierTrainer), typeof(SignatureTrainer) } - , FieldAwareFactorizationMachineTrainer.UserName, FieldAwareFactorizationMachineTrainer.LoadName, - FieldAwareFactorizationMachineTrainer.ShortName, DocName = "trainer/FactorizationMachine.md")] +[assembly: LoadableClass(FieldAwareFactorizationMachineBinaryClassificationTrainer.Summary, typeof(FieldAwareFactorizationMachineBinaryClassificationTrainer), + typeof(FieldAwareFactorizationMachineBinaryClassificationTrainer.Options), new[] { typeof(SignatureBinaryClassifierTrainer), typeof(SignatureTrainer) } + , FieldAwareFactorizationMachineBinaryClassificationTrainer.UserName, FieldAwareFactorizationMachineBinaryClassificationTrainer.LoadName, + FieldAwareFactorizationMachineBinaryClassificationTrainer.ShortName, DocName = "trainer/FactorizationMachine.md")] -[assembly: LoadableClass(typeof(void), typeof(FieldAwareFactorizationMachineTrainer), null, typeof(SignatureEntryPointModule), FieldAwareFactorizationMachineTrainer.LoadName)] +[assembly: LoadableClass(typeof(void), typeof(FieldAwareFactorizationMachineBinaryClassificationTrainer), null, typeof(SignatureEntryPointModule), FieldAwareFactorizationMachineBinaryClassificationTrainer.LoadName)] namespace Microsoft.ML.Trainers.FactorizationMachine { /* Train a field-aware factorization machine using ADAGRAD (an advanced stochastic gradient method). See references below - for details. This trainer is essentially faster the one introduced in [2] because of some implementation tricks[3]. + for details. This trainer is essentially faster than the one introduced in [2] because of some implementation tricks in [3]. [1] http://jmlr.org/papers/volume12/duchi11a/duchi11a.pdf [2] https://www.csie.ntu.edu.tw/~cjlin/papers/ffm.pdf [3] https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf */ /// - public sealed class FieldAwareFactorizationMachineTrainer : ITrainer, + public sealed class FieldAwareFactorizationMachineBinaryClassificationTrainer : ITrainer, IEstimator { internal const string Summary = "Train a field-aware factorization machine for binary classification"; @@ -41,28 +41,46 @@ public sealed class FieldAwareFactorizationMachineTrainer : ITrainer + /// Initial learning rate. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Initial learning rate", ShortName = "lr", SortOrder = 1)] [TlcModule.SweepableFloatParam(0.001f, 1.0f, isLogScale: true)] public float LearningRate = (float)0.1; - [Argument(ArgumentType.AtMostOnce, HelpText = "Number of training iterations", ShortName = "iter", SortOrder = 2)] + /// + /// Number of training iterations. + /// + [Argument(ArgumentType.AtMostOnce, HelpText = "Number of training iterations", ShortName = "iters,iter", SortOrder = 2)] [TlcModule.SweepableLongParam(1, 100)] - public int Iters = 5; + public int NumberOfIterations = 5; + /// + /// Latent space dimension. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Latent space dimension", ShortName = "d", SortOrder = 3)] [TlcModule.SweepableLongParam(4, 100)] - public int LatentDim = 20; + public int LatentDimension = 20; + /// + /// Regularization coefficient of linear weights. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Regularization coefficient of linear weights", ShortName = "lambdaLinear", SortOrder = 4)] [TlcModule.SweepableFloatParam(1e-8f, 1f, isLogScale: true)] public float LambdaLinear = 0.0001f; + /// + /// Regularization coefficient of latent weights. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Regularization coefficient of latent weights", ShortName = "lambdaLatent", SortOrder = 5)] [TlcModule.SweepableFloatParam(1e-8f, 1f, isLogScale: true)] public float LambdaLatent = 0.0001f; + /// + /// Whether to normalize the input vectors so that the concatenation of all fields' feature vectors is unit-length. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Whether to normalize the input vectors so that the concatenation of all fields' feature vectors is unit-length", ShortName = "norm", SortOrder = 6)] - public bool Norm = true; + public bool Normalize = true; /// /// Extra feature column names. The column named stores features from the first field. @@ -73,12 +91,21 @@ public sealed class Options : LearnerInputBaseWithWeight ShortName = "exfeat", SortOrder = 7)] public string[] ExtraFeatureColumns; + /// + /// Whether to shuffle for each training iteration. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Whether to shuffle for each training iteration", ShortName = "shuf", SortOrder = 90)] public bool Shuffle = true; + /// + /// Report traning progress or not. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Report traning progress or not", ShortName = "verbose", SortOrder = 91)] public bool Verbose = true; + /// + /// Radius of initial latent factors. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Radius of initial latent factors", ShortName = "rad", SortOrder = 110)] [TlcModule.SweepableFloatParam(0.1f, 1f)] public float Radius = 0.5f; @@ -123,12 +150,12 @@ public sealed class Options : LearnerInputBaseWithWeight private float _radius; /// - /// Initializes a new instance of through the class. + /// Initializes a new instance of through the class. /// /// The private instance of . /// An instance of the legacy to apply advanced parameters to the algorithm. [BestFriend] - internal FieldAwareFactorizationMachineTrainer(IHostEnvironment env, Options options) + internal FieldAwareFactorizationMachineBinaryClassificationTrainer(IHostEnvironment env, Options options) { Contracts.CheckValue(env, nameof(env)); _host = env.Register(LoadName); @@ -150,17 +177,17 @@ internal FieldAwareFactorizationMachineTrainer(IHostEnvironment env, Options opt } /// - /// Initializes a new instance of . + /// Initializes a new instance of . /// /// The private instance of . - /// The name of column hosting the features. The i-th element stores feature column of the i-th field. - /// The name of the label column. - /// The name of the optional weights' column. + /// The name of column hosting the features. The i-th element stores feature column of the i-th field. + /// The name of the label column. + /// The name of the weight column (optional). [BestFriend] - internal FieldAwareFactorizationMachineTrainer(IHostEnvironment env, - string[] featureColumns, - string labelColumn = DefaultColumnNames.Label, - string weights = null) + internal FieldAwareFactorizationMachineBinaryClassificationTrainer(IHostEnvironment env, + string[] featureColumnNames, + string labelColumnName = DefaultColumnNames.Label, + string weightColumnName = null) { Contracts.CheckValue(env, nameof(env)); _host = env.Register(LoadName); @@ -169,13 +196,13 @@ internal FieldAwareFactorizationMachineTrainer(IHostEnvironment env, Initialize(env, args); - FeatureColumns = new SchemaShape.Column[featureColumns.Length]; + FeatureColumns = new SchemaShape.Column[featureColumnNames.Length]; - for (int i = 0; i < featureColumns.Length; i++) - FeatureColumns[i] = new SchemaShape.Column(featureColumns[i], SchemaShape.Column.VectorKind.Vector, NumberDataViewType.Single, false); + for (int i = 0; i < featureColumnNames.Length; i++) + FeatureColumns[i] = new SchemaShape.Column(featureColumnNames[i], SchemaShape.Column.VectorKind.Vector, NumberDataViewType.Single, false); - LabelColumn = new SchemaShape.Column(labelColumn, SchemaShape.Column.VectorKind.Scalar, BooleanDataViewType.Instance, false); - WeightColumn = weights != null ? new SchemaShape.Column(weights, SchemaShape.Column.VectorKind.Scalar, NumberDataViewType.Single, false) : default; + LabelColumn = new SchemaShape.Column(labelColumnName, SchemaShape.Column.VectorKind.Scalar, BooleanDataViewType.Instance, false); + WeightColumn = weightColumnName != null ? new SchemaShape.Column(weightColumnName, SchemaShape.Column.VectorKind.Scalar, NumberDataViewType.Single, false) : default; } /// @@ -186,18 +213,18 @@ internal FieldAwareFactorizationMachineTrainer(IHostEnvironment env, /// private void Initialize(IHostEnvironment env, Options options) { - _host.CheckUserArg(options.LatentDim > 0, nameof(options.LatentDim), "Must be positive"); + _host.CheckUserArg(options.LatentDimension > 0, nameof(options.LatentDimension), "Must be positive"); _host.CheckUserArg(options.LambdaLinear >= 0, nameof(options.LambdaLinear), "Must be non-negative"); _host.CheckUserArg(options.LambdaLatent >= 0, nameof(options.LambdaLatent), "Must be non-negative"); _host.CheckUserArg(options.LearningRate > 0, nameof(options.LearningRate), "Must be positive"); - _host.CheckUserArg(options.Iters >= 0, nameof(options.Iters), "Must be non-negative"); - _latentDim = options.LatentDim; + _host.CheckUserArg(options.NumberOfIterations >= 0, nameof(options.NumberOfIterations), "Must be non-negative"); + _latentDim = options.LatentDimension; _latentDimAligned = FieldAwareFactorizationMachineUtils.GetAlignedVectorLength(_latentDim); _lambdaLinear = options.LambdaLinear; _lambdaLatent = options.LambdaLatent; _learningRate = options.LearningRate; - _numIterations = options.Iters; - _norm = options.Norm; + _numIterations = options.NumberOfIterations; + _norm = options.Normalize; _shuffle = options.Shuffle; _verbose = options.Verbose; _radius = options.Radius; @@ -341,7 +368,7 @@ private FieldAwareFactorizationMachineModelParameters TrainCore(IChannel ch, IPr if (predictor != null) { ch.Check(predictor.FeatureCount == totalFeatureCount, "Input model's feature count mismatches training feature count"); - ch.Check(predictor.LatentDim == _latentDim, "Input model's latent dimension mismatches trainer's"); + ch.Check(predictor.LatentDimension == _latentDim, "Input model's latent dimension mismatches trainer's"); } if (validData != null) { @@ -486,12 +513,12 @@ internal static CommonOutputs.BinaryClassificationOutput TrainBinary(IHostEnviro var host = env.Register("Train a field-aware factorization machine"); host.CheckValue(input, nameof(input)); EntryPointUtils.CheckInputArgs(host, input); - return LearnerEntryPointsUtils.Train(host, input, () => new FieldAwareFactorizationMachineTrainer(host, input), + return LearnerEntryPointsUtils.Train(host, input, () => new FieldAwareFactorizationMachineBinaryClassificationTrainer(host, input), () => LearnerEntryPointsUtils.FindColumn(host, input.TrainingData.Schema, input.LabelColumn)); } /// - /// Continues the training of a using an already trained and/or validation data, + /// Continues the training of a using an already trained and/or validation data, /// and returns a . /// public FieldAwareFactorizationMachinePredictionTransformer Fit(IDataView trainData, diff --git a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FieldAwareFactorizationMachineModelParameters.cs b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FieldAwareFactorizationMachineModelParameters.cs index e0cee3498a..8779d5a48c 100644 --- a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FieldAwareFactorizationMachineModelParameters.cs +++ b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FieldAwareFactorizationMachineModelParameters.cs @@ -26,9 +26,22 @@ public sealed class FieldAwareFactorizationMachineModelParameters : ModelParamet internal const string LoaderSignature = "FieldAwareFactMacPredict"; private protected override PredictionKind PredictionKind => PredictionKind.BinaryClassification; private bool _norm; - internal int FieldCount { get; } - internal int FeatureCount { get; } - internal int LatentDim { get; } + + /// + /// Get the number of fields. It's the symbol `m` in the doc: https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf + /// + public int FieldCount { get; } + + /// + /// Get the number of features. It's the symbol `n` in the doc: https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf + /// + public int FeatureCount { get; } + + /// + /// Get the latent dimension. It's the tlngth of `v_{j, f}` in the doc: https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf + /// + public int LatentDimension { get; } + internal int LatentDimAligned { get; } private readonly float[] _linearWeights; private readonly AlignedArray _latentWeightsAligned; @@ -54,7 +67,7 @@ private static VersionInfo GetVersionInfo() /// The latent dimensions, which is the length of `v_{j, f}` in the doc: https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf /// The linear coefficients of the features, which is the symbol `w` in the doc: https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf /// Latent representation of each feature. Note that one feature may have latent vectors - /// and each latent vector contains values. In the f-th field, the j-th feature's latent vector, `v_{j, f}` in the doc + /// and each latent vector contains values. In the f-th field, the j-th feature's latent vector, `v_{j, f}` in the doc /// https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf, starts at latentWeights[j * fieldCount * latentDim + f * latentDim]. /// The k-th element in v_{j, f} is latentWeights[j * fieldCount * latentDim + f * latentDim + k]. The size of the array must be featureCount x fieldCount x latentDim. internal FieldAwareFactorizationMachineModelParameters(IHostEnvironment env, bool norm, int fieldCount, int featureCount, int latentDim, @@ -70,7 +83,7 @@ internal FieldAwareFactorizationMachineModelParameters(IHostEnvironment env, boo _norm = norm; FieldCount = fieldCount; FeatureCount = featureCount; - LatentDim = latentDim; + LatentDimension = latentDim; _linearWeights = linearWeights; _latentWeightsAligned = new AlignedArray(FeatureCount * FieldCount * LatentDimAligned, 16); @@ -79,11 +92,11 @@ internal FieldAwareFactorizationMachineModelParameters(IHostEnvironment env, boo { for (int f = 0; f < FieldCount; f++) { - int index = j * FieldCount * LatentDim + f * LatentDim; + int index = j * FieldCount * LatentDimension + f * LatentDimension; int indexAligned = j * FieldCount * LatentDimAligned + f * LatentDimAligned; for (int k = 0; k < LatentDimAligned; k++) { - if (k < LatentDim) + if (k < LatentDimension) _latentWeightsAligned[indexAligned + k] = latentWeights[index + k]; else _latentWeightsAligned[indexAligned + k] = 0; @@ -105,7 +118,7 @@ internal FieldAwareFactorizationMachineModelParameters(IHostEnvironment env, boo _norm = norm; FieldCount = fieldCount; FeatureCount = featureCount; - LatentDim = latentDim; + LatentDimension = latentDim; _linearWeights = linearWeights; _latentWeightsAligned = latentWeightsAligned; } @@ -139,18 +152,18 @@ private FieldAwareFactorizationMachineModelParameters(IHostEnvironment env, Mode _norm = norm; FieldCount = fieldCount; FeatureCount = featureCount; - LatentDim = latentDim; + LatentDimension = latentDim; _linearWeights = linearWeights; _latentWeightsAligned = new AlignedArray(FeatureCount * FieldCount * LatentDimAligned, 16); for (int j = 0; j < FeatureCount; j++) { for (int f = 0; f < FieldCount; f++) { - int vBias = j * FieldCount * LatentDim + f * LatentDim; + int vBias = j * FieldCount * LatentDimension + f * LatentDimension; int vBiasAligned = j * FieldCount * LatentDimAligned + f * LatentDimAligned; for (int k = 0; k < LatentDimAligned; k++) { - if (k < LatentDim) + if (k < LatentDimension) _latentWeightsAligned[vBiasAligned + k] = latentWeights[vBias + k]; else _latentWeightsAligned[vBiasAligned + k] = 0; @@ -185,23 +198,23 @@ private protected override void SaveCore(ModelSaveContext ctx) Host.Assert(FieldCount > 0); Host.Assert(FeatureCount > 0); - Host.Assert(LatentDim > 0); + Host.Assert(LatentDimension > 0); Host.Assert(Utils.Size(_linearWeights) == FeatureCount); Host.Assert(_latentWeightsAligned.Size == FeatureCount * FieldCount * LatentDimAligned); ctx.Writer.Write(_norm); ctx.Writer.Write(FieldCount); ctx.Writer.Write(FeatureCount); - ctx.Writer.Write(LatentDim); + ctx.Writer.Write(LatentDimension); ctx.Writer.WriteSingleArray(_linearWeights); - float[] latentWeights = new float[FeatureCount * FieldCount * LatentDim]; + float[] latentWeights = new float[FeatureCount * FieldCount * LatentDimension]; for (int j = 0; j < FeatureCount; j++) { for (int f = 0; f < FieldCount; f++) { - int vBias = j * FieldCount * LatentDim + f * LatentDim; + int vBias = j * FieldCount * LatentDimension + f * LatentDimension; int vBiasAligned = j * FieldCount * LatentDimAligned + f * LatentDimAligned; - for (int k = 0; k < LatentDim; k++) + for (int k = 0; k < LatentDimension; k++) latentWeights[vBias + k] = _latentWeightsAligned[vBiasAligned + k]; } } @@ -237,43 +250,28 @@ internal void CopyLatentWeightsTo(AlignedArray latentWeights) latentWeights.CopyFrom(_latentWeightsAligned); } - /// - /// Get the number of fields. It's the symbol `m` in the doc: https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf - /// - public int GetFieldCount() => FieldCount; - - /// - /// Get the number of features. It's the symbol `n` in the doc: https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf - /// - public int GetFeatureCount() => FeatureCount; - - /// - /// Get the latent dimension. It's the tlngth of `v_{j, f}` in the doc: https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf - /// - public int GetLatentDim() => LatentDim; - /// /// The linear coefficients of the features. It's the symbol `w` in the doc: https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf /// - public float[] GetLinearWeights() => _linearWeights; + public IReadOnlyList GetLinearWeights() => _linearWeights; /// /// Latent representation of each feature. Note that one feature may have latent vectors - /// and each latent vector contains values. In the f-th field, the j-th feature's latent vector, `v_{j, f}` in the doc + /// and each latent vector contains values. In the f-th field, the j-th feature's latent vector, `v_{j, f}` in the doc /// https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf, starts at latentWeights[j * fieldCount * latentDim + f * latentDim]. /// The k-th element in v_{j, f} is latentWeights[j * fieldCount * latentDim + f * latentDim + k]. /// The size of the returned value is featureCount x fieldCount x latentDim. /// - public float[] GetLatentWeights() + public IReadOnlyList GetLatentWeights() { - var latentWeights = new float[FeatureCount * FieldCount * LatentDim]; + var latentWeights = new float[FeatureCount * FieldCount * LatentDimension]; for (int j = 0; j < FeatureCount; j++) { for (int f = 0; f < FieldCount; f++) { - int index = j * FieldCount * LatentDim + f * LatentDim; + int index = j * FieldCount * LatentDimension + f * LatentDimension; int indexAligned = j * FieldCount * LatentDimAligned + f * LatentDimAligned; - for (int k = 0; k < LatentDim; k++) + for (int k = 0; k < LatentDimension; k++) { latentWeights[index + k] = _latentWeightsAligned[indexAligned + k]; } diff --git a/src/Microsoft.ML.StaticPipe/FactorizationMachineStatic.cs b/src/Microsoft.ML.StaticPipe/FactorizationMachineStatic.cs index f0d80ddd9a..86e73b289f 100644 --- a/src/Microsoft.ML.StaticPipe/FactorizationMachineStatic.cs +++ b/src/Microsoft.ML.StaticPipe/FactorizationMachineStatic.cs @@ -39,7 +39,7 @@ public static (Scalar score, Scalar predictedLabel) FieldAwareFacto var rec = new CustomReconciler((env, labelCol, featureCols) => { - var trainer = new FieldAwareFactorizationMachineTrainer(env, featureCols, labelCol); + var trainer = new FieldAwareFactorizationMachineBinaryClassificationTrainer(env, featureCols, labelCol); if (onFit != null) return trainer.WithOnFitDelegate(trans => onFit(trans.Model)); @@ -65,7 +65,7 @@ public static (Scalar score, Scalar predictedLabel) FieldAwareFacto /// The predicted output. public static (Scalar score, Scalar predictedLabel) FieldAwareFactorizationMachine(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, Scalar label, Vector[] features, - FieldAwareFactorizationMachineTrainer.Options options, + FieldAwareFactorizationMachineBinaryClassificationTrainer.Options options, Action onFit = null) { Contracts.CheckValue(label, nameof(label)); @@ -76,7 +76,7 @@ public static (Scalar score, Scalar predictedLabel) FieldAwareFacto var rec = new CustomReconciler((env, labelCol, featureCols) => { - var trainer = new FieldAwareFactorizationMachineTrainer(env, options); + var trainer = new FieldAwareFactorizationMachineBinaryClassificationTrainer(env, options); if (onFit != null) return trainer.WithOnFitDelegate(trans => onFit(trans.Model)); else diff --git a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv index c4d63ccb72..32d75ad5bd 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv +++ b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv @@ -49,7 +49,7 @@ Trainers.FastTreeBinaryClassifier Uses a logit-boost boosted tree learner to per Trainers.FastTreeRanker Trains gradient boosted decision trees to the LambdaRank quasi-gradient. Microsoft.ML.Trainers.FastTree.FastTree TrainRanking Microsoft.ML.Trainers.FastTree.FastTreeRankingTrainer+Options Microsoft.ML.EntryPoints.CommonOutputs+RankingOutput Trainers.FastTreeRegressor Trains gradient boosted decision trees to fit target values using least-squares. Microsoft.ML.Trainers.FastTree.FastTree TrainRegression Microsoft.ML.Trainers.FastTree.FastTreeRegressionTrainer+Options Microsoft.ML.EntryPoints.CommonOutputs+RegressionOutput Trainers.FastTreeTweedieRegressor Trains gradient boosted decision trees to fit target values using a Tweedie loss function. This learner is a generalization of Poisson, compound Poisson, and gamma regression. Microsoft.ML.Trainers.FastTree.FastTree TrainTweedieRegression Microsoft.ML.Trainers.FastTree.FastTreeTweedieTrainer+Options Microsoft.ML.EntryPoints.CommonOutputs+RegressionOutput -Trainers.FieldAwareFactorizationMachineBinaryClassifier Train a field-aware factorization machine for binary classification Microsoft.ML.Trainers.FactorizationMachine.FieldAwareFactorizationMachineTrainer TrainBinary Microsoft.ML.Trainers.FactorizationMachine.FieldAwareFactorizationMachineTrainer+Options Microsoft.ML.EntryPoints.CommonOutputs+BinaryClassificationOutput +Trainers.FieldAwareFactorizationMachineBinaryClassifier Train a field-aware factorization machine for binary classification Microsoft.ML.Trainers.FactorizationMachine.FieldAwareFactorizationMachineBinaryClassificationTrainer TrainBinary Microsoft.ML.Trainers.FactorizationMachine.FieldAwareFactorizationMachineBinaryClassificationTrainer+Options Microsoft.ML.EntryPoints.CommonOutputs+BinaryClassificationOutput Trainers.GeneralizedAdditiveModelBinaryClassifier Trains a gradient boosted stump per feature, on all features simultaneously, to fit target values using least-squares. It mantains no interactions between features. Microsoft.ML.Trainers.FastTree.Gam TrainBinary Microsoft.ML.Trainers.FastTree.BinaryClassificationGamTrainer+Options Microsoft.ML.EntryPoints.CommonOutputs+BinaryClassificationOutput Trainers.GeneralizedAdditiveModelRegressor Trains a gradient boosted stump per feature, on all features simultaneously, to fit target values using least-squares. It mantains no interactions between features. Microsoft.ML.Trainers.FastTree.Gam TrainRegression Microsoft.ML.Trainers.FastTree.RegressionGamTrainer+Options Microsoft.ML.EntryPoints.CommonOutputs+RegressionOutput Trainers.KMeansPlusPlusClusterer K-means is a popular clustering algorithm. With K-means, the data is clustered into a specified number of clusters in order to minimize the within-cluster sum of squares. K-means++ improves upon K-means by using a better method for choosing the initial cluster centers. Microsoft.ML.Trainers.KMeans.KMeansPlusPlusTrainer TrainKMeans Microsoft.ML.Trainers.KMeans.KMeansPlusPlusTrainer+Options Microsoft.ML.EntryPoints.CommonOutputs+ClusteringOutput diff --git a/test/BaselineOutput/Common/EntryPoints/core_manifest.json b/test/BaselineOutput/Common/EntryPoints/core_manifest.json index f4d5e90e7c..ad3858fdca 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_manifest.json +++ b/test/BaselineOutput/Common/EntryPoints/core_manifest.json @@ -10119,10 +10119,11 @@ "IsNullable": false }, { - "Name": "Iters", + "Name": "NumberOfIterations", "Type": "Int", "Desc": "Number of training iterations", "Aliases": [ + "iters", "iter" ], "Required": false, @@ -10148,7 +10149,7 @@ "Default": "Features" }, { - "Name": "LatentDim", + "Name": "LatentDimension", "Type": "Int", "Desc": "Latent space dimension", "Aliases": [ @@ -10245,7 +10246,7 @@ "Default": "Auto" }, { - "Name": "Norm", + "Name": "Normalize", "Type": "Bool", "Desc": "Whether to normalize the input vectors so that the concatenation of all fields' feature vectors is unit-length", "Aliases": [ diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/FAFMEstimator.cs b/test/Microsoft.ML.Tests/TrainerEstimators/FAFMEstimator.cs index 144e603c9b..6e53eab6ad 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/FAFMEstimator.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/FAFMEstimator.cs @@ -19,7 +19,7 @@ public void FfmBinaryClassificationWithAdvancedArguments() var data = DatasetUtils.GenerateFfmSamples(500); var dataView = mlContext.Data.ReadFromEnumerable(data); - var ffmArgs = new FieldAwareFactorizationMachineTrainer.Options(); + var ffmArgs = new FieldAwareFactorizationMachineBinaryClassificationTrainer.Options(); // Customized the field names. ffmArgs.FeatureColumn = nameof(DatasetUtils.FfmExample.Field0); // First field. @@ -44,12 +44,12 @@ public void FieldAwareFactorizationMachine_Estimator() var data = new TextLoader(Env, GetFafmBCLoaderArgs()) .Read(GetDataPath(TestDatasets.breastCancer.trainFilename)); - var ffmArgs = new FieldAwareFactorizationMachineTrainer.Options { + var ffmArgs = new FieldAwareFactorizationMachineBinaryClassificationTrainer.Options { FeatureColumn = "Feature1", // Features from the 1st field. ExtraFeatureColumns = new[] { "Feature2", "Feature3", "Feature4" }, // 2nd field's feature column, 3rd field's feature column, 4th field's feature column. Shuffle = false, - Iters = 3, - LatentDim = 7, + NumberOfIterations = 3, + LatentDimension = 7, }; var est = ML.BinaryClassification.Trainers.FieldAwareFactorizationMachine(ffmArgs);