Skip to content

Enable implicit-feedback recommendation via one-class matrix factorization #1664

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Nov 20, 2018
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 40 additions & 2 deletions src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,19 @@ namespace Microsoft.ML.Trainers
public sealed class MatrixFactorizationTrainer : TrainerBase<MatrixFactorizationPredictor>,
IEstimator<MatrixFactorizationPredictionTransformer>
{
public enum LibMFLossFunctionType { SquareLossRegression = 0, SquareLossOneClass = 12 };

public sealed class Arguments
{
/// <summary>
/// Loss function minimized for finding factor matrices. Two values are allowed, 0 or 12. The values 0 means traditional collaborative filtering
/// problem with squared loss. The value 12 triggers one-class matrix factorization for implicit-feedback recommendation problem.
/// </summary>
[Argument(ArgumentType.AtMostOnce, HelpText = "Loss function minimized for finding factor matrices.")]
[TGUI(SuggestedSweeps = "0,12")]
[TlcModule.SweepableDiscreteParam("Fun", new object[] { LibMFLossFunctionType.SquareLossRegression, LibMFLossFunctionType.SquareLossOneClass })]
public LibMFLossFunctionType Fun = LibMFLossFunctionType.SquareLossRegression;

[Argument(ArgumentType.AtMostOnce, HelpText = "Regularization parameter. " +
"It's the weight of factor matrices' norms in the objective function minimized by matrix factorization's algorithm. " +
"A small value could cause over-fitting.")]
Expand All @@ -116,6 +127,24 @@ public sealed class Arguments
[TlcModule.SweepableDiscreteParam("Eta", new object[] { 0.001f, 0.01f, 0.1f })]
public double Eta = 0.1;

/// <summary>
/// Importance of unobserved (i.e., negative) entries' loss in one-class matrix factorization.
/// </summary>
[Argument(ArgumentType.AtMostOnce, HelpText = "Importance of unobserved entries' loss in one-class matrix factorization.")]
[TGUI(SuggestedSweeps = "1,0.01,0.0001,0.000001")]
[TlcModule.SweepableDiscreteParam("Alpha", new object[] { 1f, 0.01f, 0.0001f, 0.000001f})]
public double Alpha = 0.1;

/// <summary>
/// Desired negative entries' value in one-class matrix factorization. In one-class matrix factorization, all matrix values observed are one
/// (which can be viewed as positive cases in binary classification) while unobserved values (which can be viewed as negative cases in binary
/// classification) need to be specified manually using this option.
/// </summary>
[Argument(ArgumentType.AtMostOnce, HelpText = "Desired negative entries' value in one-class matrix factorization")]
[TGUI(SuggestedSweeps = "0.000001,0,0001,0.01")]
[TlcModule.SweepableDiscreteParam("C", new object[] { 0.000001f, 0.0001f, 0.01f })]
public double C = 0.000001f;

[Argument(ArgumentType.AtMostOnce, HelpText = "Number of threads can be used in the training procedure.", ShortName = "t")]
public int? NumThreads;

Expand All @@ -131,10 +160,13 @@ public sealed class Arguments
+ "and the values of the matrix are ratings. ";

// LIBMF's parameter
private readonly int _fun;
private readonly double _lambda;
private readonly int _k;
private readonly int _iter;
private readonly double _eta;
private readonly double _alpha;
private readonly double _c;
private readonly int _threads;
private readonly bool _quiet;
private readonly bool _doNmf;
Expand Down Expand Up @@ -193,10 +225,13 @@ public MatrixFactorizationTrainer(IHostEnvironment env, Arguments args) : base(e
Host.CheckUserArg(args.Lambda > 0, nameof(args.Lambda), posError);
Host.CheckUserArg(args.Eta > 0, nameof(args.Eta), posError);

_fun = (int)args.Fun;
_lambda = args.Lambda;
_k = args.K;
_iter = args.NumIterations;
_eta = args.Eta;
_alpha = args.Alpha;
_c = args.C;
_threads = args.NumThreads ?? Environment.ProcessorCount;
_quiet = args.Quiet;
_doNmf = args.NonNegative;
Expand Down Expand Up @@ -224,10 +259,13 @@ public MatrixFactorizationTrainer(IHostEnvironment env,
var args = new Arguments();
advancedSettings?.Invoke(args);

_fun = (int)args.Fun;
_lambda = args.Lambda;
_k = args.K;
_iter = args.NumIterations;
_eta = args.Eta;
_alpha = args.Alpha;
_c = args.C;
_threads = args.NumThreads ?? Environment.ProcessorCount;
_quiet = args.Quiet;
_doNmf = args.NonNegative;
Expand Down Expand Up @@ -338,8 +376,8 @@ private MatrixFactorizationPredictor TrainCore(IChannel ch, RoleMappedData data,

private SafeTrainingAndModelBuffer PrepareBuffer()
{
return new SafeTrainingAndModelBuffer(Host, _k, Math.Max(20, 2 * _threads),
_threads, _iter, _lambda, _eta, _doNmf, _quiet, copyData: false);
return new SafeTrainingAndModelBuffer(Host, _fun, _k, _threads, Math.Max(20, 2 * _threads),
_iter, _lambda, _eta, _alpha, _c, _doNmf, _quiet, copyData: false);
}

/// <summary>
Expand Down
141 changes: 127 additions & 14 deletions src/Microsoft.ML.Recommender/SafeTrainingAndModelBuffer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -44,38 +44,144 @@ private unsafe struct MFProblem
[StructLayout(LayoutKind.Explicit)]
private struct MFParameter
{
/// <summary>
/// Enum of loss functions which can be minimized.
/// 0: square loss for regression.
/// 1: absolute loss for regression.
/// 2: KL-divergence for regression.
/// 5: logistic loss for binary classification.
/// 6: squared hinge loss for binary classification.
/// 7: hinge loss for binary classification.
/// 10: row-wise Bayesian personalized ranking.
/// 11: column-wise Bayesian personalized ranking.
/// 12: squared loss for implicit-feedback matrix factorization.
/// Fun 12 is solved by a coordinate descent method while other functions invoke
/// a stochastic gradient method.
/// </summary>
[FieldOffset(0)]
public int K;
public int Fun;

/// <summary>
/// Rank of factor matrices.
/// </summary>
[FieldOffset(4)]
public int NrThreads;
public int K;

/// <summary>
/// Number of threads which can be used for training.
/// </summary>
[FieldOffset(8)]
public int NrBins;
public int NrThreads;

/// <summary>
/// Number of blocks that the training matrix is divided into. The parallel stochastic gradient
/// method in LIBMF processes assigns each thread a block at one time. The ratings in one block
/// would be sequentially accessed (not randomaly accessed like standard stochastic gradient methods).
/// </summary>
[FieldOffset(12)]
public int NrIters;
public int NrBins;

/// <summary>
/// Number of training iteration. At one iteration, all values in the training matrix are roughly accessed once.
/// </summary>
[FieldOffset(16)]
public float Lambda;
public int NrIters;

/// <summary>
/// L1-norm regularization coefficient of left factor matrix.
/// </summary>
[FieldOffset(20)]
public float Eta;
public float LambdaP1;

/// <summary>
/// L2-norm regularization coefficient of left factor matrix.
/// </summary>
[FieldOffset(24)]
public int DoNmf;
public float LambdaP2;

/// <summary>
/// L1-norm regularization coefficient of right factor matrix.
/// </summary>
[FieldOffset(28)]
public int Quiet;
public float LambdaQ1;

/// <summary>
/// L2-norm regularization coefficient of right factor matrix.
/// </summary>
[FieldOffset(32)]
public float LambdaQ2;

/// <summary>
/// Learning rate of LIBMF's stochastic gradient method.
/// </summary>
[FieldOffset(36)]
public float Eta;

/// <summary>
/// Coefficient of loss function on unobserved entries in the training matrix. It's used only with fun=12.
/// </summary>
[FieldOffset(40)]
public float Alpha;

/// <summary>
/// Desired value of unobserved entries in the training matrix. It's used only with fun=12.
/// </summary>
[FieldOffset(44)]
public float C;

/// <summary>
/// Specify if the factor matrices should be non-negative.
/// </summary>
[FieldOffset(48)]
public int DoNmf;

/// <summary>
/// Set to true so that LIBMF may produce less information to STDOUT.
/// </summary>
[FieldOffset(52)]
public int Quiet;

/// <summary>
/// Set to false so that LIBMF may reuse and modifiy the data passed in.
/// </summary>
[FieldOffset(56)]
public int CopyData;
}

[StructLayout(LayoutKind.Explicit)]
private unsafe struct MFModel
{
[FieldOffset(0)]
public int M;
public int Fun;
/// <summary>
/// Number of rows in the training matrix.
/// </summary>
[FieldOffset(4)]
public int N;
public int M;
/// <summary>
/// Number of columns in the training matrix.
/// </summary>
[FieldOffset(8)]
public int N;
/// <summary>
/// Rank of factor matrices.
/// </summary>
[FieldOffset(12)]
public int K;
/// <summary>
/// Average value in the training matrix.
/// </summary>
[FieldOffset(16)]
public float B;
/// <summary>
/// Left factor matrix. Its shape is M-by-K stored in row-major format.
/// </summary>
[FieldOffset(24)] // pointer is 8-byte on 64-bit machine.
public float* P;
[FieldOffset(24)]
/// <summary>
/// Right factor matrix. Its shape is N-by-K stored in row-major format.
/// </summary>
[FieldOffset(32)] // pointer is 8-byte on 64-bit machine.
public float* Q;
}

Expand All @@ -100,16 +206,23 @@ private unsafe struct MFModel
private unsafe MFModel* _pMFModel;
private readonly IHost _host;

public SafeTrainingAndModelBuffer(IHostEnvironment env, int k, int nrBins, int nrThreads, int nrIters, double lambda, double eta,
public SafeTrainingAndModelBuffer(IHostEnvironment env, int fun, int k, int nrThreads,
int nrBins, int nrIters, double lambda, double eta, double alpha, double c,
bool doNmf, bool quiet, bool copyData)
{
_host = env.Register("SafeTrainingAndModelBuffer");
_mfParam.Fun = fun;
_mfParam.K = k;
_mfParam.NrBins = nrBins;
_mfParam.NrThreads = nrThreads;
_mfParam.NrBins = nrBins;
_mfParam.NrIters = nrIters;
_mfParam.Lambda = (float)lambda;
_mfParam.LambdaP1 = 0;
_mfParam.LambdaP2 = (float)lambda;
_mfParam.LambdaQ1 = 0;
_mfParam.LambdaQ2 = (float)lambda;
_mfParam.Eta = (float)eta;
_mfParam.Alpha = (float)alpha;
_mfParam.C = (float)c;
_mfParam.DoNmf = doNmf ? 1 : 0;
_mfParam.Quiet = quiet ? 1 : 0;
_mfParam.CopyData = copyData ? 1 : 0;
Expand Down
107 changes: 107 additions & 0 deletions test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -331,5 +331,112 @@ public void MatrixFactorizationInMemoryDataZeroBaseIndex()
// The presence of out-of-range indexes may lead to NaN
Assert.True(float.IsNaN(pred.Score));
}

// The following ingredients are used to define a 3-by-2 one-class
// matrix used in a test, OneClassMatrixFactorizationInMemoryDataZeroBaseIndex,
// for one-class matrix factorization. One-class matrix means that all
// the available elements in the training matrix are 1. Such a matrix
// is common. Let's use online game store as an example. Assume that
// user IDs are row indexes and game IDs are column indexes. By
// encoding all users' purchase history as a matrix (i.e., if the value
// at the u-th row and the v-th column is 1, then the u-th user owns
// the v-th game), a one-class matrix gets created because all matrix
// elements are 1. If you train a prediction model from that matrix
// using standard collaborative filtering, all your predictions would
// be 1! One-class matrix factorization assumes unspecified matrix
// entries are all 0 (or a small constant value selected by the user)
// so that the trainined model can assign purchased itemas higher
// scores than those not purchased.
private const int _oneClassMatrixColumnCount = 2;
private const int _oneClassMatrixRowCount = 3;

private class OneClassMatrixElementZeroBased
{
[KeyType(Contiguous = true, Count = _oneClassMatrixColumnCount, Min = 0)]
public uint MatrixColumnIndex;
[KeyType(Contiguous = true, Count = _oneClassMatrixRowCount, Min = 0)]
public uint MatrixRowIndex;
public float Value;
}

private class OneClassMatrixElementZeroBasedForScore
{
[KeyType(Contiguous = true, Count = _oneClassMatrixColumnCount, Min = 0)]
public uint MatrixColumnIndex;
[KeyType(Contiguous = true, Count = _oneClassMatrixRowCount, Min = 0)]
public uint MatrixRowIndex;
public float Value;
public float Score;
}

[ConditionalFact(typeof(Environment), nameof(Environment.Is64BitProcess))] // This test is being fixed as part of issue #1441.
public void OneClassMatrixFactorizationInMemoryDataZeroBaseIndex()
{
// Create an in-memory matrix as a list of tuples (column index, row index, value). For one-class matrix
// factorization problem, unspecified matrix elements are all a constant provided by user. If that constant is 0.15,
// the following list means a 3-by-2 training matrix with elements:
// (0, 0, 1), (1, 1, 1), (0, 2, 1), (0, 1, 0.15), (1, 0, 0.15), (1, 2, 0.15).
// because matrix elements at (0, 1), (1, 0), and (1, 2) are not specified.
var dataMatrix = new List<OneClassMatrixElementZeroBased>();
dataMatrix.Add(new OneClassMatrixElementZeroBased() { MatrixColumnIndex = 0, MatrixRowIndex = 0, Value = 1 });
dataMatrix.Add(new OneClassMatrixElementZeroBased() { MatrixColumnIndex = 1, MatrixRowIndex = 1, Value = 1 });
dataMatrix.Add(new OneClassMatrixElementZeroBased() { MatrixColumnIndex = 0, MatrixRowIndex = 2, Value = 1 });

// Convert the in-memory matrix into an IDataView so that ML.NET components can consume it.
var dataView = ComponentCreation.CreateDataView(Env, dataMatrix);

// Create a matrix factorization trainer which may consume "Value" as the training label, "MatrixColumnIndex" as the
// matrix's column index, and "MatrixRowIndex" as the matrix's row index.
var mlContext = new MLContext(seed: 1, conc: 1);
var pipeline = new MatrixFactorizationTrainer(mlContext,
nameof(OneClassMatrixElementZeroBased.MatrixColumnIndex),
nameof(OneClassMatrixElementZeroBased.MatrixRowIndex),
nameof(OneClassMatrixElementZeroBased.Value),
advancedSettings: s =>
{
s.Fun = MatrixFactorizationTrainer.LibMFLossFunctionType.SquareLossOneClass;
s.NumIterations = 100;
s.NumThreads = 1; // To eliminate randomness, # of threads must be 1.
// Let's test non-default regularization coefficient.
s.Lambda = 0.025;
s.K = 16;
// Importance coefficient of loss function over matrix elements not specified in the input matrix.
s.Alpha = 0.01;
// Desired value for matrix elements not specified in the input matrix.
s.C = 0.15;
});

// Train a matrix factorization model.
var model = pipeline.Fit(dataView);

// Apply the trained model to the training set.
var prediction = model.Transform(dataView);

// Calculate regression matrices for the prediction result.
var metrics = mlContext.Regression.Evaluate(prediction, label: "Value", score: "Score");

// Make sure the prediction error is not too large.
Assert.InRange(metrics.L2, 0, 0.0016);

// Create data for testing. Note that the 2nd element is not specified in the training data so it should
// be close to the constant specified by s.C = 0.15. Comparing with the data structure used in training phase,
// one extra float is added into OneClassMatrixElementZeroBasedForScore for storing the prediction result. Note
// that the prediction engine may ignore Value and assign the predicted value to Score.
var testDataMatrix = new List<OneClassMatrixElementZeroBasedForScore>();
testDataMatrix.Add(new OneClassMatrixElementZeroBasedForScore() { MatrixColumnIndex = 0, MatrixRowIndex = 0, Value = 0, Score = 0 });
testDataMatrix.Add(new OneClassMatrixElementZeroBasedForScore() { MatrixColumnIndex = 1, MatrixRowIndex = 2, Value = 0, Score = 0 });

// Convert the in-memory matrix into an IDataView so that ML.NET components can consume it.
var testDataView = ComponentCreation.CreateDataView(Env, testDataMatrix);

// Apply the trained model to the test data.
var testPrediction = model.Transform(testDataView);

var testResults = new List<OneClassMatrixElementZeroBasedForScore>(testPrediction.AsEnumerable<OneClassMatrixElementZeroBasedForScore>(mlContext, false));
// Positive example (i.e., examples can be found in dataMatrix) is close to 1.
CompareNumbersWithTolerance(0.982391, testResults[0].Score, digitsOfPrecision: 5);
// Negative example (i.e., examples can not be found in dataMatrix) is close to 0.15 (specified by s.C = 0.15 in the trainer).
CompareNumbersWithTolerance(0.141411, testResults[1].Score, digitsOfPrecision: 5);
}
}
}