Enable implicit-feedback recommendation via one-class matrix factorization (#1664)

wschin · web-flow · commit df1c2af3369a · 2018-11-20T15:43:44.000-08:00
Sync with the latest LIBMF to enable implicit-feedback recommendation
diff --git a/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs b/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs
@@ -89,8 +89,19 @@ namespace Microsoft.ML.Trainers
     public sealed class MatrixFactorizationTrainer : TrainerBase<MatrixFactorizationPredictor>,
         IEstimator<MatrixFactorizationPredictionTransformer>
     {
+        public enum LossFunctionType { SquareLossRegression = 0, SquareLossOneClass = 12 };
+
         public sealed class Arguments
         {
+            /// <summary>
+            /// Loss function minimized for finding factor matrices.  Two values are allowed, 0 or 12. The values 0 means traditional collaborative filtering
+            /// problem with squared loss. The value 12 triggers one-class matrix factorization for implicit-feedback recommendation problem.
+            /// </summary>
+            [Argument(ArgumentType.AtMostOnce, HelpText = "Loss function minimized for finding factor matrices.")]
+            [TGUI(SuggestedSweeps = "0,12")]
+            [TlcModule.SweepableDiscreteParam("LossFunction", new object[] { LossFunctionType.SquareLossRegression, LossFunctionType.SquareLossOneClass })]
+            public LossFunctionType LossFunction = LossFunctionType.SquareLossRegression;
+
             [Argument(ArgumentType.AtMostOnce, HelpText = "Regularization parameter. " +
                 "It's the weight of factor matrices' norms in the objective function minimized by matrix factorization's algorithm. " +
                 "A small value could cause over-fitting.")]
@@ -116,6 +127,33 @@ public sealed class Arguments
             [TlcModule.SweepableDiscreteParam("Eta", new object[] { 0.001f, 0.01f, 0.1f })]
             public double Eta = 0.1;
 
+            /// <summary>
+            /// Importance of unobserved (i.e., negative) entries' loss in one-class matrix factorization.
+            /// In general, only a few of matrix entries (e.g., less than 1%) in the training are observed (i.e., positive).
+            /// To balance the contributions from unobserved and obverved in the overall loss function, this parameter is
+            /// usually a small value so that the solver is able to find a factorization equally good to unobserved and observed
+            /// entries. If only 10000 observed entries present in a 200000-by-300000 training matrix, one can try Alpha = 10000 / (200000*300000 - 10000).
+            /// When most entries in the training matrix are observed, one can use Alpha >> 1; for example, if only 10000 in previous
+            /// matrix is not observed, one can try Alpha = (200000 * 300000 - 10000) / 10000. Consequently,
+            /// Alpha = (# of observed entries) / (# of unobserved entries) can make observed and unobserved entries equally important
+            /// in the minimized loss function. However, the best setting in machine learning is alwasy data-depedent so user still needs to
+            /// try multiple values.
+            /// </summary>
+            [Argument(ArgumentType.AtMostOnce, HelpText = "Importance of unobserved entries' loss in one-class matrix factorization.")]
+            [TGUI(SuggestedSweeps = "1,0.01,0.0001,0.000001")]
+            [TlcModule.SweepableDiscreteParam("Alpha", new object[] { 1f, 0.01f, 0.0001f, 0.000001f})]
+            public double Alpha = 0.0001;
+
+            /// <summary>
+            /// Desired negative entries value in one-class matrix factorization. In one-class matrix factorization, all matrix values observed are one
+            /// (which can be viewed as positive cases in binary classification) while unobserved values (which can be viewed as negative cases in binary
+            /// classification) need to be specified manually using this option.
+            /// </summary>
+            [Argument(ArgumentType.AtMostOnce, HelpText = "Desired negative entries' value in one-class matrix factorization")]
+            [TGUI(SuggestedSweeps = "0.000001,0,0001,0.01")]
+            [TlcModule.SweepableDiscreteParam("C", new object[] { 0.000001f, 0.0001f, 0.01f })]
+            public double C = 0.000001f;
+
             [Argument(ArgumentType.AtMostOnce, HelpText = "Number of threads can be used in the training procedure.", ShortName = "t")]
             public int? NumThreads;
 
@@ -131,10 +169,13 @@ public sealed class Arguments
             + "and the values of the matrix are ratings. ";
 
         // LIBMF's parameter
+        private readonly int _fun;
         private readonly double _lambda;
         private readonly int _k;
         private readonly int _iter;
         private readonly double _eta;
+        private readonly double _alpha;
+        private readonly double _c;
         private readonly int _threads;
         private readonly bool _quiet;
         private readonly bool _doNmf;
@@ -192,11 +233,15 @@ public MatrixFactorizationTrainer(IHostEnvironment env, Arguments args) : base(e
             Host.CheckUserArg(args.NumIterations > 0, nameof(args.NumIterations), posError);
             Host.CheckUserArg(args.Lambda > 0, nameof(args.Lambda), posError);
             Host.CheckUserArg(args.Eta > 0, nameof(args.Eta), posError);
+            Host.CheckUserArg(args.Alpha > 0, nameof(args.Alpha), posError);
 
+            _fun = (int)args.LossFunction;
             _lambda = args.Lambda;
             _k = args.K;
             _iter = args.NumIterations;
             _eta = args.Eta;
+            _alpha = args.Alpha;
+            _c = args.C;
             _threads = args.NumThreads ?? Environment.ProcessorCount;
             _quiet = args.Quiet;
             _doNmf = args.NonNegative;
@@ -224,10 +269,13 @@ public MatrixFactorizationTrainer(IHostEnvironment env,
             var args = new Arguments();
             advancedSettings?.Invoke(args);
 
+            _fun = (int)args.LossFunction;
             _lambda = args.Lambda;
             _k = args.K;
             _iter = args.NumIterations;
             _eta = args.Eta;
+            _alpha = args.Alpha;
+            _c = args.C;
             _threads = args.NumThreads ?? Environment.ProcessorCount;
             _quiet = args.Quiet;
             _doNmf = args.NonNegative;
@@ -338,8 +386,8 @@ private MatrixFactorizationPredictor TrainCore(IChannel ch, RoleMappedData data,
 
         private SafeTrainingAndModelBuffer PrepareBuffer()
         {
-            return new SafeTrainingAndModelBuffer(Host, _k, Math.Max(20, 2 * _threads),
-                _threads, _iter, _lambda, _eta, _doNmf, _quiet, copyData: false);
+            return new SafeTrainingAndModelBuffer(Host, _fun, _k, _threads, Math.Max(20, 2 * _threads),
+                _iter, _lambda, _eta, _alpha, _c, _doNmf, _quiet, copyData: false);
         }
 
         /// <summary>
diff --git a/src/Microsoft.ML.Recommender/SafeTrainingAndModelBuffer.cs b/src/Microsoft.ML.Recommender/SafeTrainingAndModelBuffer.cs
@@ -44,38 +44,144 @@ private unsafe struct MFProblem
         [StructLayout(LayoutKind.Explicit)]
         private struct MFParameter
         {
+            /// <summary>
+            /// Enum of loss functions which can be minimized.
+            ///  0: square loss for regression.
+            ///  1: absolute loss for regression.
+            ///  2: KL-divergence for regression.
+            ///  5: logistic loss for binary classification.
+            ///  6: squared hinge loss for binary classification.
+            ///  7: hinge loss for binary classification.
+            ///  10: row-wise Bayesian personalized ranking.
+            ///  11: column-wise Bayesian personalized ranking.
+            ///  12: squared loss for implicit-feedback matrix factorization.
+            /// Fun 12 is solved by a coordinate descent method while other functions invoke
+            /// a stochastic gradient method.
+            /// </summary>
             [FieldOffset(0)]
-            public int K;
+            public int Fun;
+
+            /// <summary>
+            /// Rank of factor matrices.
+            /// </summary>
             [FieldOffset(4)]
-            public int NrThreads;
+            public int K;
+
+            /// <summary>
+            /// Number of threads which can be used for training.
+            /// </summary>
             [FieldOffset(8)]
-            public int NrBins;
+            public int NrThreads;
+
+            /// <summary>
+            /// Number of blocks that the training matrix is divided into. The parallel stochastic gradient
+            /// method in LIBMF processes assigns each thread a block at one time. The ratings in one block
+            /// would be sequentially accessed (not randomaly accessed like standard stochastic gradient methods).
+            /// </summary>
             [FieldOffset(12)]
-            public int NrIters;
+            public int NrBins;
+
+            /// <summary>
+            /// Number of training iteration. At one iteration, all values in the training matrix are roughly accessed once.
+            /// </summary>
             [FieldOffset(16)]
-            public float Lambda;
+            public int NrIters;
+
+            /// <summary>
+            /// L1-norm regularization coefficient of left factor matrix.
+            /// </summary>
             [FieldOffset(20)]
-            public float Eta;
+            public float LambdaP1;
+
+            /// <summary>
+            /// L2-norm regularization coefficient of left factor matrix.
+            /// </summary>
             [FieldOffset(24)]
-            public int DoNmf;
+            public float LambdaP2;
+
+            /// <summary>
+            /// L1-norm regularization coefficient of right factor matrix.
+            /// </summary>
             [FieldOffset(28)]
-            public int Quiet;
+            public float LambdaQ1;
+
+            /// <summary>
+            /// L2-norm regularization coefficient of right factor matrix.
+            /// </summary>
             [FieldOffset(32)]
+            public float LambdaQ2;
+
+            /// <summary>
+            /// Learning rate of LIBMF's stochastic gradient method.
+            /// </summary>
+            [FieldOffset(36)]
+            public float Eta;
+
+            /// <summary>
+            /// Coefficient of loss function on unobserved entries in the training matrix. It's used only with fun=12.
+            /// </summary>
+            [FieldOffset(40)]
+            public float Alpha;
+
+            /// <summary>
+            /// Desired value of unobserved entries in the training matrix. It's used only with fun=12.
+            /// </summary>
+            [FieldOffset(44)]
+            public float C;
+
+            /// <summary>
+            /// Specify if the factor matrices should be non-negative.
+            /// </summary>
+            [FieldOffset(48)]
+            public int DoNmf;
+
+            /// <summary>
+            /// Set to true so that LIBMF may produce less information to STDOUT.
+            /// </summary>
+            [FieldOffset(52)]
+            public int Quiet;
+
+            /// <summary>
+            /// Set to false so that LIBMF may reuse and modifiy the data passed in.
+            /// </summary>
+            [FieldOffset(56)]
             public int CopyData;
         }
 
         [StructLayout(LayoutKind.Explicit)]
         private unsafe struct MFModel
         {
             [FieldOffset(0)]
-            public int M;
+            public int Fun;
+            /// <summary>
+            /// Number of rows in the training matrix.
+            /// </summary>
             [FieldOffset(4)]
-            public int N;
+            public int M;
+            /// <summary>
+            /// Number of columns in the training matrix.
+            /// </summary>
             [FieldOffset(8)]
+            public int N;
+            /// <summary>
+            /// Rank of factor matrices.
+            /// </summary>
+            [FieldOffset(12)]
             public int K;
+            /// <summary>
+            /// Average value in the training matrix.
+            /// </summary>
             [FieldOffset(16)]
+            public float B;
+            /// <summary>
+            /// Left factor matrix. Its shape is M-by-K stored in row-major format.
+            /// </summary>
+            [FieldOffset(24)] // pointer is 8-byte on 64-bit machine.
             public float* P;
-            [FieldOffset(24)]
+            /// <summary>
+            /// Right factor matrix. Its shape is N-by-K stored in row-major format.
+            /// </summary>
+            [FieldOffset(32)] // pointer is 8-byte on 64-bit machine.
             public float* Q;
         }
 
@@ -100,16 +206,23 @@ private unsafe struct MFModel
         private unsafe MFModel* _pMFModel;
         private readonly IHost _host;
 
-        public SafeTrainingAndModelBuffer(IHostEnvironment env, int k, int nrBins, int nrThreads, int nrIters, double lambda, double eta,
+        public SafeTrainingAndModelBuffer(IHostEnvironment env, int fun, int k, int nrThreads,
+            int nrBins, int nrIters, double lambda, double eta, double alpha, double c,
             bool doNmf, bool quiet, bool copyData)
         {
             _host = env.Register("SafeTrainingAndModelBuffer");
+            _mfParam.Fun = fun;
             _mfParam.K = k;
-            _mfParam.NrBins = nrBins;
             _mfParam.NrThreads = nrThreads;
+            _mfParam.NrBins = nrBins;
             _mfParam.NrIters = nrIters;
-            _mfParam.Lambda = (float)lambda;
+            _mfParam.LambdaP1 = 0;
+            _mfParam.LambdaP2 = (float)lambda;
+            _mfParam.LambdaQ1 = 0;
+            _mfParam.LambdaQ2 = (float)lambda;
             _mfParam.Eta = (float)eta;
+            _mfParam.Alpha = (float)alpha;
+            _mfParam.C = (float)c;
             _mfParam.DoNmf = doNmf ? 1 : 0;
             _mfParam.Quiet = quiet ? 1 : 0;
             _mfParam.CopyData = copyData ? 1 : 0;
diff --git a/src/Native/MatrixFactorizationNative/libmf b/src/Native/MatrixFactorizationNative/libmf
@@ -1 +1 @@
-Subproject commit 1ecc365249e5cac5e72c66317a141298dc52f6e3
+Subproject commit f92a18161b6824fda4c4ab698a69d299a836841a
diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs