Skip to content

Commit df1c2af

Browse files
authored
Enable implicit-feedback recommendation via one-class matrix factorization (#1664)
Sync with the latest LIBMF to enable implicit-feedback recommendation
1 parent 1a9e7aa commit df1c2af

File tree

4 files changed

+285
-17
lines changed

4 files changed

+285
-17
lines changed

src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs

+50-2
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,19 @@ namespace Microsoft.ML.Trainers
8989
public sealed class MatrixFactorizationTrainer : TrainerBase<MatrixFactorizationPredictor>,
9090
IEstimator<MatrixFactorizationPredictionTransformer>
9191
{
92+
public enum LossFunctionType { SquareLossRegression = 0, SquareLossOneClass = 12 };
93+
9294
public sealed class Arguments
9395
{
96+
/// <summary>
97+
/// Loss function minimized for finding factor matrices. Two values are allowed, 0 or 12. The values 0 means traditional collaborative filtering
98+
/// problem with squared loss. The value 12 triggers one-class matrix factorization for implicit-feedback recommendation problem.
99+
/// </summary>
100+
[Argument(ArgumentType.AtMostOnce, HelpText = "Loss function minimized for finding factor matrices.")]
101+
[TGUI(SuggestedSweeps = "0,12")]
102+
[TlcModule.SweepableDiscreteParam("LossFunction", new object[] { LossFunctionType.SquareLossRegression, LossFunctionType.SquareLossOneClass })]
103+
public LossFunctionType LossFunction = LossFunctionType.SquareLossRegression;
104+
94105
[Argument(ArgumentType.AtMostOnce, HelpText = "Regularization parameter. " +
95106
"It's the weight of factor matrices' norms in the objective function minimized by matrix factorization's algorithm. " +
96107
"A small value could cause over-fitting.")]
@@ -116,6 +127,33 @@ public sealed class Arguments
116127
[TlcModule.SweepableDiscreteParam("Eta", new object[] { 0.001f, 0.01f, 0.1f })]
117128
public double Eta = 0.1;
118129

130+
/// <summary>
131+
/// Importance of unobserved (i.e., negative) entries' loss in one-class matrix factorization.
132+
/// In general, only a few of matrix entries (e.g., less than 1%) in the training are observed (i.e., positive).
133+
/// To balance the contributions from unobserved and obverved in the overall loss function, this parameter is
134+
/// usually a small value so that the solver is able to find a factorization equally good to unobserved and observed
135+
/// entries. If only 10000 observed entries present in a 200000-by-300000 training matrix, one can try Alpha = 10000 / (200000*300000 - 10000).
136+
/// When most entries in the training matrix are observed, one can use Alpha >> 1; for example, if only 10000 in previous
137+
/// matrix is not observed, one can try Alpha = (200000 * 300000 - 10000) / 10000. Consequently,
138+
/// Alpha = (# of observed entries) / (# of unobserved entries) can make observed and unobserved entries equally important
139+
/// in the minimized loss function. However, the best setting in machine learning is alwasy data-depedent so user still needs to
140+
/// try multiple values.
141+
/// </summary>
142+
[Argument(ArgumentType.AtMostOnce, HelpText = "Importance of unobserved entries' loss in one-class matrix factorization.")]
143+
[TGUI(SuggestedSweeps = "1,0.01,0.0001,0.000001")]
144+
[TlcModule.SweepableDiscreteParam("Alpha", new object[] { 1f, 0.01f, 0.0001f, 0.000001f})]
145+
public double Alpha = 0.0001;
146+
147+
/// <summary>
148+
/// Desired negative entries value in one-class matrix factorization. In one-class matrix factorization, all matrix values observed are one
149+
/// (which can be viewed as positive cases in binary classification) while unobserved values (which can be viewed as negative cases in binary
150+
/// classification) need to be specified manually using this option.
151+
/// </summary>
152+
[Argument(ArgumentType.AtMostOnce, HelpText = "Desired negative entries' value in one-class matrix factorization")]
153+
[TGUI(SuggestedSweeps = "0.000001,0,0001,0.01")]
154+
[TlcModule.SweepableDiscreteParam("C", new object[] { 0.000001f, 0.0001f, 0.01f })]
155+
public double C = 0.000001f;
156+
119157
[Argument(ArgumentType.AtMostOnce, HelpText = "Number of threads can be used in the training procedure.", ShortName = "t")]
120158
public int? NumThreads;
121159

@@ -131,10 +169,13 @@ public sealed class Arguments
131169
+ "and the values of the matrix are ratings. ";
132170

133171
// LIBMF's parameter
172+
private readonly int _fun;
134173
private readonly double _lambda;
135174
private readonly int _k;
136175
private readonly int _iter;
137176
private readonly double _eta;
177+
private readonly double _alpha;
178+
private readonly double _c;
138179
private readonly int _threads;
139180
private readonly bool _quiet;
140181
private readonly bool _doNmf;
@@ -192,11 +233,15 @@ public MatrixFactorizationTrainer(IHostEnvironment env, Arguments args) : base(e
192233
Host.CheckUserArg(args.NumIterations > 0, nameof(args.NumIterations), posError);
193234
Host.CheckUserArg(args.Lambda > 0, nameof(args.Lambda), posError);
194235
Host.CheckUserArg(args.Eta > 0, nameof(args.Eta), posError);
236+
Host.CheckUserArg(args.Alpha > 0, nameof(args.Alpha), posError);
195237

238+
_fun = (int)args.LossFunction;
196239
_lambda = args.Lambda;
197240
_k = args.K;
198241
_iter = args.NumIterations;
199242
_eta = args.Eta;
243+
_alpha = args.Alpha;
244+
_c = args.C;
200245
_threads = args.NumThreads ?? Environment.ProcessorCount;
201246
_quiet = args.Quiet;
202247
_doNmf = args.NonNegative;
@@ -224,10 +269,13 @@ public MatrixFactorizationTrainer(IHostEnvironment env,
224269
var args = new Arguments();
225270
advancedSettings?.Invoke(args);
226271

272+
_fun = (int)args.LossFunction;
227273
_lambda = args.Lambda;
228274
_k = args.K;
229275
_iter = args.NumIterations;
230276
_eta = args.Eta;
277+
_alpha = args.Alpha;
278+
_c = args.C;
231279
_threads = args.NumThreads ?? Environment.ProcessorCount;
232280
_quiet = args.Quiet;
233281
_doNmf = args.NonNegative;
@@ -338,8 +386,8 @@ private MatrixFactorizationPredictor TrainCore(IChannel ch, RoleMappedData data,
338386

339387
private SafeTrainingAndModelBuffer PrepareBuffer()
340388
{
341-
return new SafeTrainingAndModelBuffer(Host, _k, Math.Max(20, 2 * _threads),
342-
_threads, _iter, _lambda, _eta, _doNmf, _quiet, copyData: false);
389+
return new SafeTrainingAndModelBuffer(Host, _fun, _k, _threads, Math.Max(20, 2 * _threads),
390+
_iter, _lambda, _eta, _alpha, _c, _doNmf, _quiet, copyData: false);
343391
}
344392

345393
/// <summary>

src/Microsoft.ML.Recommender/SafeTrainingAndModelBuffer.cs

+127-14
Original file line numberDiff line numberDiff line change
@@ -44,38 +44,144 @@ private unsafe struct MFProblem
4444
[StructLayout(LayoutKind.Explicit)]
4545
private struct MFParameter
4646
{
47+
/// <summary>
48+
/// Enum of loss functions which can be minimized.
49+
/// 0: square loss for regression.
50+
/// 1: absolute loss for regression.
51+
/// 2: KL-divergence for regression.
52+
/// 5: logistic loss for binary classification.
53+
/// 6: squared hinge loss for binary classification.
54+
/// 7: hinge loss for binary classification.
55+
/// 10: row-wise Bayesian personalized ranking.
56+
/// 11: column-wise Bayesian personalized ranking.
57+
/// 12: squared loss for implicit-feedback matrix factorization.
58+
/// Fun 12 is solved by a coordinate descent method while other functions invoke
59+
/// a stochastic gradient method.
60+
/// </summary>
4761
[FieldOffset(0)]
48-
public int K;
62+
public int Fun;
63+
64+
/// <summary>
65+
/// Rank of factor matrices.
66+
/// </summary>
4967
[FieldOffset(4)]
50-
public int NrThreads;
68+
public int K;
69+
70+
/// <summary>
71+
/// Number of threads which can be used for training.
72+
/// </summary>
5173
[FieldOffset(8)]
52-
public int NrBins;
74+
public int NrThreads;
75+
76+
/// <summary>
77+
/// Number of blocks that the training matrix is divided into. The parallel stochastic gradient
78+
/// method in LIBMF processes assigns each thread a block at one time. The ratings in one block
79+
/// would be sequentially accessed (not randomaly accessed like standard stochastic gradient methods).
80+
/// </summary>
5381
[FieldOffset(12)]
54-
public int NrIters;
82+
public int NrBins;
83+
84+
/// <summary>
85+
/// Number of training iteration. At one iteration, all values in the training matrix are roughly accessed once.
86+
/// </summary>
5587
[FieldOffset(16)]
56-
public float Lambda;
88+
public int NrIters;
89+
90+
/// <summary>
91+
/// L1-norm regularization coefficient of left factor matrix.
92+
/// </summary>
5793
[FieldOffset(20)]
58-
public float Eta;
94+
public float LambdaP1;
95+
96+
/// <summary>
97+
/// L2-norm regularization coefficient of left factor matrix.
98+
/// </summary>
5999
[FieldOffset(24)]
60-
public int DoNmf;
100+
public float LambdaP2;
101+
102+
/// <summary>
103+
/// L1-norm regularization coefficient of right factor matrix.
104+
/// </summary>
61105
[FieldOffset(28)]
62-
public int Quiet;
106+
public float LambdaQ1;
107+
108+
/// <summary>
109+
/// L2-norm regularization coefficient of right factor matrix.
110+
/// </summary>
63111
[FieldOffset(32)]
112+
public float LambdaQ2;
113+
114+
/// <summary>
115+
/// Learning rate of LIBMF's stochastic gradient method.
116+
/// </summary>
117+
[FieldOffset(36)]
118+
public float Eta;
119+
120+
/// <summary>
121+
/// Coefficient of loss function on unobserved entries in the training matrix. It's used only with fun=12.
122+
/// </summary>
123+
[FieldOffset(40)]
124+
public float Alpha;
125+
126+
/// <summary>
127+
/// Desired value of unobserved entries in the training matrix. It's used only with fun=12.
128+
/// </summary>
129+
[FieldOffset(44)]
130+
public float C;
131+
132+
/// <summary>
133+
/// Specify if the factor matrices should be non-negative.
134+
/// </summary>
135+
[FieldOffset(48)]
136+
public int DoNmf;
137+
138+
/// <summary>
139+
/// Set to true so that LIBMF may produce less information to STDOUT.
140+
/// </summary>
141+
[FieldOffset(52)]
142+
public int Quiet;
143+
144+
/// <summary>
145+
/// Set to false so that LIBMF may reuse and modifiy the data passed in.
146+
/// </summary>
147+
[FieldOffset(56)]
64148
public int CopyData;
65149
}
66150

67151
[StructLayout(LayoutKind.Explicit)]
68152
private unsafe struct MFModel
69153
{
70154
[FieldOffset(0)]
71-
public int M;
155+
public int Fun;
156+
/// <summary>
157+
/// Number of rows in the training matrix.
158+
/// </summary>
72159
[FieldOffset(4)]
73-
public int N;
160+
public int M;
161+
/// <summary>
162+
/// Number of columns in the training matrix.
163+
/// </summary>
74164
[FieldOffset(8)]
165+
public int N;
166+
/// <summary>
167+
/// Rank of factor matrices.
168+
/// </summary>
169+
[FieldOffset(12)]
75170
public int K;
171+
/// <summary>
172+
/// Average value in the training matrix.
173+
/// </summary>
76174
[FieldOffset(16)]
175+
public float B;
176+
/// <summary>
177+
/// Left factor matrix. Its shape is M-by-K stored in row-major format.
178+
/// </summary>
179+
[FieldOffset(24)] // pointer is 8-byte on 64-bit machine.
77180
public float* P;
78-
[FieldOffset(24)]
181+
/// <summary>
182+
/// Right factor matrix. Its shape is N-by-K stored in row-major format.
183+
/// </summary>
184+
[FieldOffset(32)] // pointer is 8-byte on 64-bit machine.
79185
public float* Q;
80186
}
81187

@@ -100,16 +206,23 @@ private unsafe struct MFModel
100206
private unsafe MFModel* _pMFModel;
101207
private readonly IHost _host;
102208

103-
public SafeTrainingAndModelBuffer(IHostEnvironment env, int k, int nrBins, int nrThreads, int nrIters, double lambda, double eta,
209+
public SafeTrainingAndModelBuffer(IHostEnvironment env, int fun, int k, int nrThreads,
210+
int nrBins, int nrIters, double lambda, double eta, double alpha, double c,
104211
bool doNmf, bool quiet, bool copyData)
105212
{
106213
_host = env.Register("SafeTrainingAndModelBuffer");
214+
_mfParam.Fun = fun;
107215
_mfParam.K = k;
108-
_mfParam.NrBins = nrBins;
109216
_mfParam.NrThreads = nrThreads;
217+
_mfParam.NrBins = nrBins;
110218
_mfParam.NrIters = nrIters;
111-
_mfParam.Lambda = (float)lambda;
219+
_mfParam.LambdaP1 = 0;
220+
_mfParam.LambdaP2 = (float)lambda;
221+
_mfParam.LambdaQ1 = 0;
222+
_mfParam.LambdaQ2 = (float)lambda;
112223
_mfParam.Eta = (float)eta;
224+
_mfParam.Alpha = (float)alpha;
225+
_mfParam.C = (float)c;
113226
_mfParam.DoNmf = doNmf ? 1 : 0;
114227
_mfParam.Quiet = quiet ? 1 : 0;
115228
_mfParam.CopyData = copyData ? 1 : 0;

0 commit comments

Comments
 (0)