dotnet · wschin · Nov 20, 2018 · Nov 19, 2018 · Nov 19, 2018 · Nov 19, 2018
diff --git a/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs b/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs
@@ -89,8 +89,19 @@ namespace Microsoft.ML.Trainers
     public sealed class MatrixFactorizationTrainer : TrainerBase<MatrixFactorizationPredictor>,
         IEstimator<MatrixFactorizationPredictionTransformer>
     {
+        public enum LibMFLossFunctionType { SquareLossRegression = 0, SquareLossOneClass = 12 };
+
         public sealed class Arguments
         {
+            /// <summary>
+            /// Loss function minimized for finding factor matrices.  Two values are allowed, 0 or 12. The values 0 means traditional collaborative filtering
+            /// problem with squared loss. The value 12 triggers one-class matrix factorization for implicit-feedback recommendation problem.
+            /// </summary>
+            [Argument(ArgumentType.AtMostOnce, HelpText = "Loss function minimized for finding factor matrices.")]
+            [TGUI(SuggestedSweeps = "0,12")]
+            [TlcModule.SweepableDiscreteParam("Fun", new object[] { LibMFLossFunctionType.SquareLossRegression, LibMFLossFunctionType.SquareLossOneClass })]
+            public LibMFLossFunctionType Fun = LibMFLossFunctionType.SquareLossRegression;
+
             [Argument(ArgumentType.AtMostOnce, HelpText = "Regularization parameter. " +
                 "It's the weight of factor matrices' norms in the objective function minimized by matrix factorization's algorithm. " +
                 "A small value could cause over-fitting.")]
@@ -116,6 +127,24 @@ public sealed class Arguments
             [TlcModule.SweepableDiscreteParam("Eta", new object[] { 0.001f, 0.01f, 0.1f })]
             public double Eta = 0.1;
 
+            /// <summary>
+            /// Importance of unobserved (i.e., negative) entries' loss in one-class matrix factorization.
+            /// </summary>
+            [Argument(ArgumentType.AtMostOnce, HelpText = "Importance of unobserved entries' loss in one-class matrix factorization.")]
+            [TGUI(SuggestedSweeps = "1,0.01,0.0001,0.000001")]
+            [TlcModule.SweepableDiscreteParam("Alpha", new object[] { 1f, 0.01f, 0.0001f, 0.000001f})]
+            public double Alpha = 0.1;
+
+            /// <summary>
+            /// Desired negative entries' value in one-class matrix factorization. In one-class matrix factorization, all matrix values observed are one
+            /// (which can be viewed as positive cases in binary classification) while unobserved values (which can be viewed as negative cases in binary
+            /// classification) need to be specified manually using this option.
+            /// </summary>
+            [Argument(ArgumentType.AtMostOnce, HelpText = "Desired negative entries' value in one-class matrix factorization")]
+            [TGUI(SuggestedSweeps = "0.000001,0,0001,0.01")]
+            [TlcModule.SweepableDiscreteParam("C", new object[] { 0.000001f, 0.0001f, 0.01f })]
+            public double C = 0.000001f;
+
             [Argument(ArgumentType.AtMostOnce, HelpText = "Number of threads can be used in the training procedure.", ShortName = "t")]
             public int? NumThreads;
 
@@ -131,10 +160,13 @@ public sealed class Arguments
             + "and the values of the matrix are ratings. ";
 
         // LIBMF's parameter
+        private readonly int _fun;
         private readonly double _lambda;
         private readonly int _k;
         private readonly int _iter;
         private readonly double _eta;
+        private readonly double _alpha;
+        private readonly double _c;
         private readonly int _threads;
         private readonly bool _quiet;
         private readonly bool _doNmf;
@@ -193,10 +225,13 @@ public MatrixFactorizationTrainer(IHostEnvironment env, Arguments args) : base(e
             Host.CheckUserArg(args.Lambda > 0, nameof(args.Lambda), posError);
             Host.CheckUserArg(args.Eta > 0, nameof(args.Eta), posError);
 
+            _fun = (int)args.Fun;
             _lambda = args.Lambda;
             _k = args.K;
             _iter = args.NumIterations;
             _eta = args.Eta;
+            _alpha = args.Alpha;
+            _c = args.C;
             _threads = args.NumThreads ?? Environment.ProcessorCount;
             _quiet = args.Quiet;
             _doNmf = args.NonNegative;
@@ -224,10 +259,13 @@ public MatrixFactorizationTrainer(IHostEnvironment env,
             var args = new Arguments();
             advancedSettings?.Invoke(args);
 
+            _fun = (int)args.Fun;
             _lambda = args.Lambda;
             _k = args.K;
             _iter = args.NumIterations;
             _eta = args.Eta;
+            _alpha = args.Alpha;
+            _c = args.C;
             _threads = args.NumThreads ?? Environment.ProcessorCount;
             _quiet = args.Quiet;
             _doNmf = args.NonNegative;
@@ -338,8 +376,8 @@ private MatrixFactorizationPredictor TrainCore(IChannel ch, RoleMappedData data,
 
         private SafeTrainingAndModelBuffer PrepareBuffer()
         {
-            return new SafeTrainingAndModelBuffer(Host, _k, Math.Max(20, 2 * _threads),
-                _threads, _iter, _lambda, _eta, _doNmf, _quiet, copyData: false);
+            return new SafeTrainingAndModelBuffer(Host, _fun, _k, _threads, Math.Max(20, 2 * _threads),
+                _iter, _lambda, _eta, _alpha, _c, _doNmf, _quiet, copyData: false);
         }
 
         /// <summary>

diff --git a/src/Microsoft.ML.Recommender/SafeTrainingAndModelBuffer.cs b/src/Microsoft.ML.Recommender/SafeTrainingAndModelBuffer.cs
@@ -44,38 +44,144 @@ private unsafe struct MFProblem
         [StructLayout(LayoutKind.Explicit)]
         private struct MFParameter
         {
+            /// <summary>
+            /// Enum of loss functions which can be minimized.
+            ///  0: square loss for regression.
+            ///  1: absolute loss for regression.
+            ///  2: KL-divergence for regression.
+            ///  5: logistic loss for binary classification.
+            ///  6: squared hinge loss for binary classification.
+            ///  7: hinge loss for binary classification.
+            ///  10: row-wise Bayesian personalized ranking.
+            ///  11: column-wise Bayesian personalized ranking.
+            ///  12: squared loss for implicit-feedback matrix factorization.
+            /// Fun 12 is solved by a coordinate descent method while other functions invoke
+            /// a stochastic gradient method.
+            /// </summary>
             [FieldOffset(0)]
-            public int K;
+            public int Fun;
+
+            /// <summary>
+            /// Rank of factor matrices.
+            /// </summary>
             [FieldOffset(4)]
-            public int NrThreads;
+            public int K;
+
+            /// <summary>
+            /// Number of threads which can be used for training.
+            /// </summary>
             [FieldOffset(8)]
-            public int NrBins;
+            public int NrThreads;
+
+            /// <summary>
+            /// Number of blocks that the training matrix is divided into. The parallel stochastic gradient
+            /// method in LIBMF processes assigns each thread a block at one time. The ratings in one block
+            /// would be sequentially accessed (not randomaly accessed like standard stochastic gradient methods).
+            /// </summary>
             [FieldOffset(12)]
-            public int NrIters;
+            public int NrBins;
+
+            /// <summary>
+            /// Number of training iteration. At one iteration, all values in the training matrix are roughly accessed once.
+            /// </summary>
             [FieldOffset(16)]
-            public float Lambda;
+            public int NrIters;
+
+            /// <summary>
+            /// L1-norm regularization coefficient of left factor matrix.
+            /// </summary>
             [FieldOffset(20)]
-            public float Eta;
+            public float LambdaP1;
+
+            /// <summary>
+            /// L2-norm regularization coefficient of left factor matrix.
+            /// </summary>
             [FieldOffset(24)]
-            public int DoNmf;
+            public float LambdaP2;
+
+            /// <summary>
+            /// L1-norm regularization coefficient of right factor matrix.
+            /// </summary>
             [FieldOffset(28)]
-            public int Quiet;
+            public float LambdaQ1;
+
+            /// <summary>
+            /// L2-norm regularization coefficient of right factor matrix.
+            /// </summary>
             [FieldOffset(32)]
+            public float LambdaQ2;
+
+            /// <summary>
+            /// Learning rate of LIBMF's stochastic gradient method.
+            /// </summary>
+            [FieldOffset(36)]
+            public float Eta;
+
+            /// <summary>
+            /// Coefficient of loss function on unobserved entries in the training matrix. It's used only with fun=12.
+            /// </summary>
+            [FieldOffset(40)]
+            public float Alpha;
+
+            /// <summary>
+            /// Desired value of unobserved entries in the training matrix. It's used only with fun=12.
+            /// </summary>
+            [FieldOffset(44)]
+            public float C;
+
+            /// <summary>
+            /// Specify if the factor matrices should be non-negative.
+            /// </summary>
+            [FieldOffset(48)]
+            public int DoNmf;
+
+            /// <summary>
+            /// Set to true so that LIBMF may produce less information to STDOUT.
+            /// </summary>
+            [FieldOffset(52)]
+            public int Quiet;
+
+            /// <summary>
+            /// Set to false so that LIBMF may reuse and modifiy the data passed in.
+            /// </summary>
+            [FieldOffset(56)]
             public int CopyData;
         }
 
         [StructLayout(LayoutKind.Explicit)]
         private unsafe struct MFModel
         {
             [FieldOffset(0)]
-            public int M;
+            public int Fun;
+            /// <summary>
+            /// Number of rows in the training matrix.
+            /// </summary>
             [FieldOffset(4)]
-            public int N;
+            public int M;
+            /// <summary>
+            /// Number of columns in the training matrix.
+            /// </summary>
             [FieldOffset(8)]
+            public int N;
+            /// <summary>
+            /// Rank of factor matrices.
+            /// </summary>
+            [FieldOffset(12)]
             public int K;
+            /// <summary>
+            /// Average value in the training matrix.
+            /// </summary>
             [FieldOffset(16)]
+            public float B;
+            /// <summary>
+            /// Left factor matrix. Its shape is M-by-K stored in row-major format.
+            /// </summary>
+            [FieldOffset(24)] // pointer is 8-byte on 64-bit machine.
             public float* P;
-            [FieldOffset(24)]
+            /// <summary>
+            /// Right factor matrix. Its shape is N-by-K stored in row-major format.
+            /// </summary>
+            [FieldOffset(32)] // pointer is 8-byte on 64-bit machine.
             public float* Q;
         }
 
@@ -100,16 +206,23 @@ private unsafe struct MFModel
         private unsafe MFModel* _pMFModel;
         private readonly IHost _host;
 
-        public SafeTrainingAndModelBuffer(IHostEnvironment env, int k, int nrBins, int nrThreads, int nrIters, double lambda, double eta,
+        public SafeTrainingAndModelBuffer(IHostEnvironment env, int fun, int k, int nrThreads,
+            int nrBins, int nrIters, double lambda, double eta, double alpha, double c,
             bool doNmf, bool quiet, bool copyData)
         {
             _host = env.Register("SafeTrainingAndModelBuffer");
+            _mfParam.Fun = fun;
             _mfParam.K = k;
-            _mfParam.NrBins = nrBins;
             _mfParam.NrThreads = nrThreads;
+            _mfParam.NrBins = nrBins;
             _mfParam.NrIters = nrIters;
-            _mfParam.Lambda = (float)lambda;
+            _mfParam.LambdaP1 = 0;
+            _mfParam.LambdaP2 = (float)lambda;
+            _mfParam.LambdaQ1 = 0;
+            _mfParam.LambdaQ2 = (float)lambda;
             _mfParam.Eta = (float)eta;
+            _mfParam.Alpha = (float)alpha;
+            _mfParam.C = (float)c;
             _mfParam.DoNmf = doNmf ? 1 : 0;
             _mfParam.Quiet = quiet ? 1 : 0;
             _mfParam.CopyData = copyData ? 1 : 0;

diff --git a/src/Native/MatrixFactorizationNative/libmf b/src/Native/MatrixFactorizationNative/libmf
diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs
@@ -331,5 +331,112 @@ public void MatrixFactorizationInMemoryDataZeroBaseIndex()
                 // The presence of out-of-range indexes may lead to NaN
                 Assert.True(float.IsNaN(pred.Score));
         }
+
+        // The following ingredients are used to define a 3-by-2 one-class
+        // matrix used in a test, OneClassMatrixFactorizationInMemoryDataZeroBaseIndex,
+        // for one-class matrix factorization. One-class matrix means that all
+        // the available elements in the training matrix are 1. Such a matrix
+        // is common. Let's use online game store as an example. Assume that
+        // user IDs are row indexes and game IDs are column indexes. By
+        // encoding all users' purchase history as a matrix (i.e., if the value
+        // at the u-th row and the v-th column is 1, then the u-th user owns
+        // the v-th game), a one-class matrix gets created because all matrix
+        // elements are 1. If you train a prediction model from that matrix
+        // using standard collaborative filtering, all your predictions would
+        // be 1! One-class matrix factorization assumes unspecified matrix
+        // entries are all 0 (or a small constant value selected by the user)
+        // so that the trainined model can assign purchased itemas higher
+        // scores than those not purchased.
+        private const int _oneClassMatrixColumnCount = 2;
+        private const int _oneClassMatrixRowCount = 3;
+
+        private class OneClassMatrixElementZeroBased
+        {
+            [KeyType(Contiguous = true, Count = _oneClassMatrixColumnCount, Min = 0)]
+            public uint MatrixColumnIndex;
+            [KeyType(Contiguous = true, Count = _oneClassMatrixRowCount, Min = 0)]
+            public uint MatrixRowIndex;
+            public float Value;
+        }
+
+        private class OneClassMatrixElementZeroBasedForScore
+        {
+            [KeyType(Contiguous = true, Count = _oneClassMatrixColumnCount, Min = 0)]
+            public uint MatrixColumnIndex;
+            [KeyType(Contiguous = true, Count = _oneClassMatrixRowCount, Min = 0)]
+            public uint MatrixRowIndex;
+            public float Value;
+            public float Score;
+        }
+
+        [ConditionalFact(typeof(Environment), nameof(Environment.Is64BitProcess))] // This test is being fixed as part of issue #1441.
+        public void OneClassMatrixFactorizationInMemoryDataZeroBaseIndex()
+        {
+            // Create an in-memory matrix as a list of tuples (column index, row index, value). For one-class matrix
+            // factorization problem, unspecified matrix elements are all a constant provided by user. If that constant is 0.15,
+            // the following list means a 3-by-2 training matrix with elements:
+            //   (0, 0, 1), (1, 1, 1), (0, 2, 1), (0, 1, 0.15), (1, 0, 0.15), (1, 2, 0.15).
+            // because matrix elements at (0, 1), (1, 0), and (1, 2) are not specified.
+            var dataMatrix = new List<OneClassMatrixElementZeroBased>();
+            dataMatrix.Add(new OneClassMatrixElementZeroBased() { MatrixColumnIndex = 0, MatrixRowIndex = 0, Value = 1 });
+            dataMatrix.Add(new OneClassMatrixElementZeroBased() { MatrixColumnIndex = 1, MatrixRowIndex = 1, Value = 1 });
+            dataMatrix.Add(new OneClassMatrixElementZeroBased() { MatrixColumnIndex = 0, MatrixRowIndex = 2, Value = 1 });
+
+            // Convert the in-memory matrix into an IDataView so that ML.NET components can consume it.
+            var dataView = ComponentCreation.CreateDataView(Env, dataMatrix);
+
+            // Create a matrix factorization trainer which may consume "Value" as the training label, "MatrixColumnIndex" as the
+            // matrix's column index, and "MatrixRowIndex" as the matrix's row index.
+            var mlContext = new MLContext(seed: 1, conc: 1);
+            var pipeline = new MatrixFactorizationTrainer(mlContext,
+                nameof(OneClassMatrixElementZeroBased.MatrixColumnIndex),
+                nameof(OneClassMatrixElementZeroBased.MatrixRowIndex),
+                nameof(OneClassMatrixElementZeroBased.Value),
+                advancedSettings: s =>
+                {
+                    s.Fun = MatrixFactorizationTrainer.LibMFLossFunctionType.SquareLossOneClass;
+                    s.NumIterations = 100;
+                    s.NumThreads = 1; // To eliminate randomness, # of threads must be 1.
+                    // Let's test non-default regularization coefficient.
+                    s.Lambda = 0.025;
+                    s.K = 16;
+                    // Importance coefficient of loss function over matrix elements not specified in the input matrix.
+                    s.Alpha = 0.01;
+                    // Desired value for matrix elements not specified in the input matrix.
+                    s.C = 0.15;
+                });
+
+            // Train a matrix factorization model.
+            var model = pipeline.Fit(dataView);
+
+            // Apply the trained model to the training set.
+            var prediction = model.Transform(dataView);
+
+            // Calculate regression matrices for the prediction result.
+            var metrics = mlContext.Regression.Evaluate(prediction, label: "Value", score: "Score");
+
+            // Make sure the prediction error is not too large.
+            Assert.InRange(metrics.L2, 0, 0.0016);
+
+            // Create data for testing. Note that the 2nd element is not specified in the training data so it should
+            // be close to the constant specified by s.C = 0.15. Comparing with the data structure used in training phase,
+            // one extra float is added into OneClassMatrixElementZeroBasedForScore for storing the prediction result. Note
+            // that the prediction engine may ignore Value and assign the predicted value to Score.
+            var testDataMatrix = new List<OneClassMatrixElementZeroBasedForScore>();
+            testDataMatrix.Add(new OneClassMatrixElementZeroBasedForScore() { MatrixColumnIndex = 0, MatrixRowIndex = 0, Value = 0, Score = 0 });
+            testDataMatrix.Add(new OneClassMatrixElementZeroBasedForScore() { MatrixColumnIndex = 1, MatrixRowIndex = 2, Value = 0, Score = 0 });
+
+            // Convert the in-memory matrix into an IDataView so that ML.NET components can consume it.
+            var testDataView = ComponentCreation.CreateDataView(Env, testDataMatrix);
+
+            // Apply the trained model to the test data.
+            var testPrediction = model.Transform(testDataView);
+
+            var testResults = new List<OneClassMatrixElementZeroBasedForScore>(testPrediction.AsEnumerable<OneClassMatrixElementZeroBasedForScore>(mlContext, false));
+            // Positive example (i.e., examples can be found in dataMatrix) is close to 1.
+            CompareNumbersWithTolerance(0.982391, testResults[0].Score, digitsOfPrecision: 5);
+            // Negative example (i.e., examples can not be found in dataMatrix) is close to 0.15 (specified by s.C = 0.15 in the trainer).
+            CompareNumbersWithTolerance(0.141411, testResults[1].Score, digitsOfPrecision: 5);
+        }
     }
 }
+20 −0		.appveyor.yml
+22 −0		.travis.yml
+5 −4		Makefile
+14 −3		Makefile.win
+379 −100		README
+1,382 −0		demo/all_one_matrix.te.txt
+5,172 −0		demo/all_one_matrix.tr.txt
+1,312 −0		demo/binary_matrix.te.txt
+4,937 −0		demo/binary_matrix.tr.txt
+40 −0		demo/demo.bat
+58 −0		demo/demo.sh
+0 −0		demo/real_matrix.te.txt
+0 −0		demo/real_matrix.tr.txt
+127 −23		mf-predict.cpp
+205 −89		mf-train.cpp
+3,950 −646		mf.cpp
+18 −7		mf.def
+59 −14		mf.h
+ −		windows/mf-predict.exe
+ −		windows/mf-train.exe
+ −		windows/mf.dll