Skip to content

Enable implicit-feedback recommendation via one-class matrix factorization #1664

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Nov 20, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 50 additions & 2 deletions src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,19 @@ namespace Microsoft.ML.Trainers
public sealed class MatrixFactorizationTrainer : TrainerBase<MatrixFactorizationPredictor>,
IEstimator<MatrixFactorizationPredictionTransformer>
{
public enum LossFunctionType { SquareLossRegression = 0, SquareLossOneClass = 12 };

public sealed class Arguments
{
/// <summary>
/// Loss function minimized for finding factor matrices. Two values are allowed, 0 or 12. The values 0 means traditional collaborative filtering
/// problem with squared loss. The value 12 triggers one-class matrix factorization for implicit-feedback recommendation problem.
/// </summary>
[Argument(ArgumentType.AtMostOnce, HelpText = "Loss function minimized for finding factor matrices.")]
[TGUI(SuggestedSweeps = "0,12")]
[TlcModule.SweepableDiscreteParam("LossFunction", new object[] { LossFunctionType.SquareLossRegression, LossFunctionType.SquareLossOneClass })]
public LossFunctionType LossFunction = LossFunctionType.SquareLossRegression;

[Argument(ArgumentType.AtMostOnce, HelpText = "Regularization parameter. " +
"It's the weight of factor matrices' norms in the objective function minimized by matrix factorization's algorithm. " +
"A small value could cause over-fitting.")]
Expand All @@ -116,6 +127,33 @@ public sealed class Arguments
[TlcModule.SweepableDiscreteParam("Eta", new object[] { 0.001f, 0.01f, 0.1f })]
public double Eta = 0.1;

/// <summary>
/// Importance of unobserved (i.e., negative) entries' loss in one-class matrix factorization.
/// In general, only a few of matrix entries (e.g., less than 1%) in the training are observed (i.e., positive).
/// To balance the contributions from unobserved and obverved in the overall loss function, this parameter is
/// usually a small value so that the solver is able to find a factorization equally good to unobserved and observed
/// entries. If only 10000 observed entries present in a 200000-by-300000 training matrix, one can try Alpha = 10000 / (200000*300000 - 10000).
/// When most entries in the training matrix are observed, one can use Alpha >> 1; for example, if only 10000 in previous
/// matrix is not observed, one can try Alpha = (200000 * 300000 - 10000) / 10000. Consequently,
/// Alpha = (# of observed entries) / (# of unobserved entries) can make observed and unobserved entries equally important
/// in the minimized loss function. However, the best setting in machine learning is alwasy data-depedent so user still needs to
/// try multiple values.
/// </summary>
[Argument(ArgumentType.AtMostOnce, HelpText = "Importance of unobserved entries' loss in one-class matrix factorization.")]
[TGUI(SuggestedSweeps = "1,0.01,0.0001,0.000001")]
[TlcModule.SweepableDiscreteParam("Alpha", new object[] { 1f, 0.01f, 0.0001f, 0.000001f})]
public double Alpha = 0.0001;

/// <summary>
/// Desired negative entries value in one-class matrix factorization. In one-class matrix factorization, all matrix values observed are one
/// (which can be viewed as positive cases in binary classification) while unobserved values (which can be viewed as negative cases in binary
/// classification) need to be specified manually using this option.
/// </summary>
[Argument(ArgumentType.AtMostOnce, HelpText = "Desired negative entries' value in one-class matrix factorization")]
[TGUI(SuggestedSweeps = "0.000001,0,0001,0.01")]
[TlcModule.SweepableDiscreteParam("C", new object[] { 0.000001f, 0.0001f, 0.01f })]
public double C = 0.000001f;

[Argument(ArgumentType.AtMostOnce, HelpText = "Number of threads can be used in the training procedure.", ShortName = "t")]
public int? NumThreads;

Expand All @@ -131,10 +169,13 @@ public sealed class Arguments
+ "and the values of the matrix are ratings. ";

// LIBMF's parameter
private readonly int _fun;
private readonly double _lambda;
private readonly int _k;
private readonly int _iter;
private readonly double _eta;
private readonly double _alpha;
private readonly double _c;
private readonly int _threads;
private readonly bool _quiet;
private readonly bool _doNmf;
Expand Down Expand Up @@ -192,11 +233,15 @@ public MatrixFactorizationTrainer(IHostEnvironment env, Arguments args) : base(e
Host.CheckUserArg(args.NumIterations > 0, nameof(args.NumIterations), posError);
Host.CheckUserArg(args.Lambda > 0, nameof(args.Lambda), posError);
Host.CheckUserArg(args.Eta > 0, nameof(args.Eta), posError);
Host.CheckUserArg(args.Alpha > 0, nameof(args.Alpha), posError);

_fun = (int)args.LossFunction;
_lambda = args.Lambda;
_k = args.K;
_iter = args.NumIterations;
_eta = args.Eta;
_alpha = args.Alpha;
_c = args.C;
_threads = args.NumThreads ?? Environment.ProcessorCount;
_quiet = args.Quiet;
_doNmf = args.NonNegative;
Expand Down Expand Up @@ -224,10 +269,13 @@ public MatrixFactorizationTrainer(IHostEnvironment env,
var args = new Arguments();
advancedSettings?.Invoke(args);

_fun = (int)args.LossFunction;
_lambda = args.Lambda;
_k = args.K;
_iter = args.NumIterations;
_eta = args.Eta;
_alpha = args.Alpha;
_c = args.C;
_threads = args.NumThreads ?? Environment.ProcessorCount;
_quiet = args.Quiet;
_doNmf = args.NonNegative;
Expand Down Expand Up @@ -338,8 +386,8 @@ private MatrixFactorizationPredictor TrainCore(IChannel ch, RoleMappedData data,

private SafeTrainingAndModelBuffer PrepareBuffer()
{
return new SafeTrainingAndModelBuffer(Host, _k, Math.Max(20, 2 * _threads),
_threads, _iter, _lambda, _eta, _doNmf, _quiet, copyData: false);
return new SafeTrainingAndModelBuffer(Host, _fun, _k, _threads, Math.Max(20, 2 * _threads),
_iter, _lambda, _eta, _alpha, _c, _doNmf, _quiet, copyData: false);
}

/// <summary>
Expand Down
141 changes: 127 additions & 14 deletions src/Microsoft.ML.Recommender/SafeTrainingAndModelBuffer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -44,38 +44,144 @@ private unsafe struct MFProblem
[StructLayout(LayoutKind.Explicit)]
private struct MFParameter
{
/// <summary>
/// Enum of loss functions which can be minimized.
/// 0: square loss for regression.
/// 1: absolute loss for regression.
/// 2: KL-divergence for regression.
/// 5: logistic loss for binary classification.
/// 6: squared hinge loss for binary classification.
/// 7: hinge loss for binary classification.
/// 10: row-wise Bayesian personalized ranking.
/// 11: column-wise Bayesian personalized ranking.
/// 12: squared loss for implicit-feedback matrix factorization.
/// Fun 12 is solved by a coordinate descent method while other functions invoke
/// a stochastic gradient method.
/// </summary>
[FieldOffset(0)]
public int K;
public int Fun;

/// <summary>
/// Rank of factor matrices.
/// </summary>
[FieldOffset(4)]
public int NrThreads;
public int K;

/// <summary>
/// Number of threads which can be used for training.
/// </summary>
[FieldOffset(8)]
public int NrBins;
public int NrThreads;

/// <summary>
/// Number of blocks that the training matrix is divided into. The parallel stochastic gradient
/// method in LIBMF processes assigns each thread a block at one time. The ratings in one block
/// would be sequentially accessed (not randomaly accessed like standard stochastic gradient methods).
/// </summary>
[FieldOffset(12)]
public int NrIters;
public int NrBins;

/// <summary>
/// Number of training iteration. At one iteration, all values in the training matrix are roughly accessed once.
/// </summary>
[FieldOffset(16)]
public float Lambda;
public int NrIters;

/// <summary>
/// L1-norm regularization coefficient of left factor matrix.
/// </summary>
[FieldOffset(20)]
public float Eta;
public float LambdaP1;

/// <summary>
/// L2-norm regularization coefficient of left factor matrix.
/// </summary>
[FieldOffset(24)]
public int DoNmf;
public float LambdaP2;

/// <summary>
/// L1-norm regularization coefficient of right factor matrix.
/// </summary>
[FieldOffset(28)]
public int Quiet;
public float LambdaQ1;

/// <summary>
/// L2-norm regularization coefficient of right factor matrix.
/// </summary>
[FieldOffset(32)]
public float LambdaQ2;

/// <summary>
/// Learning rate of LIBMF's stochastic gradient method.
/// </summary>
[FieldOffset(36)]
public float Eta;

/// <summary>
/// Coefficient of loss function on unobserved entries in the training matrix. It's used only with fun=12.
/// </summary>
[FieldOffset(40)]
public float Alpha;

/// <summary>
/// Desired value of unobserved entries in the training matrix. It's used only with fun=12.
/// </summary>
[FieldOffset(44)]
public float C;

/// <summary>
/// Specify if the factor matrices should be non-negative.
/// </summary>
[FieldOffset(48)]
public int DoNmf;

/// <summary>
/// Set to true so that LIBMF may produce less information to STDOUT.
/// </summary>
[FieldOffset(52)]
public int Quiet;

/// <summary>
/// Set to false so that LIBMF may reuse and modifiy the data passed in.
/// </summary>
[FieldOffset(56)]
public int CopyData;
}

[StructLayout(LayoutKind.Explicit)]
private unsafe struct MFModel
{
[FieldOffset(0)]
public int M;
public int Fun;
/// <summary>
/// Number of rows in the training matrix.
/// </summary>
[FieldOffset(4)]
public int N;
public int M;
/// <summary>
/// Number of columns in the training matrix.
/// </summary>
[FieldOffset(8)]
public int N;
/// <summary>
/// Rank of factor matrices.
/// </summary>
[FieldOffset(12)]
public int K;
/// <summary>
/// Average value in the training matrix.
/// </summary>
[FieldOffset(16)]
public float B;
/// <summary>
/// Left factor matrix. Its shape is M-by-K stored in row-major format.
/// </summary>
[FieldOffset(24)] // pointer is 8-byte on 64-bit machine.
public float* P;
[FieldOffset(24)]
/// <summary>
/// Right factor matrix. Its shape is N-by-K stored in row-major format.
/// </summary>
[FieldOffset(32)] // pointer is 8-byte on 64-bit machine.
public float* Q;
}

Expand All @@ -100,16 +206,23 @@ private unsafe struct MFModel
private unsafe MFModel* _pMFModel;
private readonly IHost _host;

public SafeTrainingAndModelBuffer(IHostEnvironment env, int k, int nrBins, int nrThreads, int nrIters, double lambda, double eta,
public SafeTrainingAndModelBuffer(IHostEnvironment env, int fun, int k, int nrThreads,
int nrBins, int nrIters, double lambda, double eta, double alpha, double c,
bool doNmf, bool quiet, bool copyData)
{
_host = env.Register("SafeTrainingAndModelBuffer");
_mfParam.Fun = fun;
_mfParam.K = k;
_mfParam.NrBins = nrBins;
_mfParam.NrThreads = nrThreads;
_mfParam.NrBins = nrBins;
_mfParam.NrIters = nrIters;
_mfParam.Lambda = (float)lambda;
_mfParam.LambdaP1 = 0;
_mfParam.LambdaP2 = (float)lambda;
_mfParam.LambdaQ1 = 0;
_mfParam.LambdaQ2 = (float)lambda;
_mfParam.Eta = (float)eta;
_mfParam.Alpha = (float)alpha;
_mfParam.C = (float)c;
_mfParam.DoNmf = doNmf ? 1 : 0;
_mfParam.Quiet = quiet ? 1 : 0;
_mfParam.CopyData = copyData ? 1 : 0;
Expand Down
Loading