Skip to content

Commit 48dffb0

Browse files
authored
Internalize and cleanup recommender project (#2451)
1 parent a254bf5 commit 48dffb0

File tree

10 files changed

+408
-193
lines changed

10 files changed

+408
-193
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using Microsoft.ML.Data;
4+
using static Microsoft.ML.SamplesUtils.DatasetUtils;
5+
6+
namespace Microsoft.ML.Samples.Dynamic
7+
{
8+
public partial class MatrixFactorizationExample
9+
{
10+
// This example first creates in-memory data and then use it to train a matrix factorization mode with default parameters. Afterward, quality metrics are reported.
11+
public static void MatrixFactorization()
12+
{
13+
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
14+
// as a catalog of available operations and as the source of randomness.
15+
var mlContext = new MLContext(seed: 0, conc: 1);
16+
17+
// Get a small in-memory dataset.
18+
var data = GetRecommendationData();
19+
20+
// Convert the in-memory matrix into an IDataView so that ML.NET components can consume it.
21+
var dataView = mlContext.Data.ReadFromEnumerable(data);
22+
23+
// Create a matrix factorization trainer which may consume "Value" as the training label, "MatrixColumnIndex" as the
24+
// matrix's column index, and "MatrixRowIndex" as the matrix's row index. Here nameof(...) is used to extract field
25+
// names' in MatrixElement class.
26+
var pipeline = mlContext.Recommendation().Trainers.MatrixFactorization(nameof(MatrixElement.MatrixColumnIndex),
27+
nameof(MatrixElement.MatrixRowIndex), nameof(MatrixElement.Value), 10, 0.2, 10);
28+
29+
// Train a matrix factorization model.
30+
var model = pipeline.Fit(dataView);
31+
32+
// Apply the trained model to the training set.
33+
var prediction = model.Transform(dataView);
34+
35+
// Calculate regression matrices for the prediction result.
36+
var metrics = mlContext.Recommendation().Evaluate(prediction,
37+
label: nameof(MatrixElement.Value), score: nameof(MatrixElementForScore.Score));
38+
39+
// Print out some metrics for checking the model's quality.
40+
Console.WriteLine($"L1 - {metrics.L1}"); // 0.17208
41+
Console.WriteLine($"L2 - {metrics.L2}"); // 0.04766
42+
Console.WriteLine($"LossFunction - {metrics.LossFn}"); // 0.04766
43+
Console.WriteLine($"RMS - {metrics.Rms}"); //0.21831
44+
Console.WriteLine($"RSquared - {metrics.RSquared}"); // 0.97616
45+
46+
// Create two two entries for making prediction. Of course, the prediction value, Score, is unknown so it can be anything
47+
// (here we use Score=0 and it will be overwritten by the true prediction). If any of row and column indexes are out-of-range
48+
// (e.g., MatrixColumnIndex=99999), the prediction value will be NaN.
49+
var testMatrix = new List<MatrixElementForScore>() {
50+
new MatrixElementForScore() { MatrixColumnIndex = 1, MatrixRowIndex = 7, Score = 0 },
51+
new MatrixElementForScore() { MatrixColumnIndex = 3, MatrixRowIndex = 6, Score = 0 } };
52+
53+
// Again, convert the test data to a format supported by ML.NET.
54+
var testDataView = mlContext.Data.ReadFromEnumerable(testMatrix);
55+
// Feed the test data into the model and then iterate through all predictions.
56+
foreach (var pred in mlContext.CreateEnumerable<MatrixElementForScore>(model.Transform(testDataView), false))
57+
Console.WriteLine($"Predicted value at row {pred.MatrixRowIndex - 1} and column {pred.MatrixColumnIndex - 1} is {pred.Score}");
58+
// Predicted value at row 7 and column 1 is 2.876928
59+
// Predicted value at row 6 and column 3 is 3.587935
60+
}
61+
}
62+
}

docs/samples/Microsoft.ML.Samples/Dynamic/MatrixFactorization.cs renamed to docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/MatrixFactorizationWithOptions.cs

+18-51
Original file line numberDiff line numberDiff line change
@@ -2,73 +2,38 @@
22
using System.Collections.Generic;
33
using Microsoft.ML.Data;
44
using Microsoft.ML.Trainers;
5+
using static Microsoft.ML.SamplesUtils.DatasetUtils;
56

67
namespace Microsoft.ML.Samples.Dynamic
78
{
8-
public class MatrixFactorizationExample
9+
public partial class MatrixFactorizationExample
910
{
10-
// The following variables defines the shape of a matrix. Its shape is _synthesizedMatrixRowCount-by-_synthesizedMatrixColumnCount.
11-
// Because in ML.NET key type's minimal value is zero, the first row index is always zero in C# data structure (e.g., MatrixColumnIndex=0
12-
// and MatrixRowIndex=0 in MatrixElement below specifies the value at the upper-left corner in the training matrix). If user's row index
13-
// starts with 1, their row index 1 would be mapped to the 2nd row in matrix factorization module and their first row may contain no values.
14-
// This behavior is also true to column index.
15-
const int _synthesizedMatrixFirstColumnIndex = 1;
16-
const int _synthesizedMatrixFirstRowIndex = 1;
17-
const int _synthesizedMatrixColumnCount = 60;
18-
const int _synthesizedMatrixRowCount = 100;
19-
20-
// A data structure used to encode a single value in matrix
21-
internal class MatrixElement
22-
{
23-
// Matrix column index is at most _synthesizedMatrixColumnCount + _synthesizedMatrixFirstColumnIndex.
24-
[KeyType(Count = _synthesizedMatrixColumnCount + _synthesizedMatrixFirstColumnIndex)]
25-
public uint MatrixColumnIndex;
26-
// Matrix row index is at most _synthesizedMatrixRowCount + _synthesizedMatrixFirstRowIndex.
27-
[KeyType(Count = _synthesizedMatrixRowCount + _synthesizedMatrixFirstRowIndex)]
28-
public uint MatrixRowIndex;
29-
// The value at the column MatrixColumnIndex and row MatrixRowIndex.
30-
public float Value;
31-
}
32-
33-
// A data structure used to encode prediction result. Comparing with MatrixElement, The field Value in MatrixElement is
34-
// renamed to Score because Score is the default name of matrix factorization's output.
35-
internal class MatrixElementForScore
36-
{
37-
[KeyType(Count = _synthesizedMatrixColumnCount + _synthesizedMatrixFirstColumnIndex)]
38-
public uint MatrixColumnIndex;
39-
[KeyType(Count = _synthesizedMatrixRowCount + _synthesizedMatrixFirstRowIndex)]
40-
public uint MatrixRowIndex;
41-
public float Score;
42-
}
4311

4412
// This example first creates in-memory data and then use it to train a matrix factorization model. Afterward, quality metrics are reported.
45-
public static void MatrixFactorizationInMemoryData()
13+
public static void MatrixFactorizationWithOptions()
4614
{
47-
// Create an in-memory matrix as a list of tuples (column index, row index, value).
48-
var dataMatrix = new List<MatrixElement>();
49-
for (uint i = _synthesizedMatrixFirstColumnIndex; i < _synthesizedMatrixFirstColumnIndex + _synthesizedMatrixColumnCount; ++i)
50-
for (uint j = _synthesizedMatrixFirstRowIndex; j < _synthesizedMatrixFirstRowIndex + _synthesizedMatrixRowCount; ++j)
51-
dataMatrix.Add(new MatrixElement() { MatrixColumnIndex = i, MatrixRowIndex = j, Value = (i + j) % 5 });
52-
5315
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
5416
// as a catalog of available operations and as the source of randomness.
5517
var mlContext = new MLContext(seed: 0, conc: 1);
5618

19+
// Get a small in-memory dataset.
20+
var data = GetRecommendationData();
21+
5722
// Convert the in-memory matrix into an IDataView so that ML.NET components can consume it.
58-
var dataView = mlContext.Data.ReadFromEnumerable(dataMatrix);
23+
var dataView = mlContext.Data.ReadFromEnumerable(data);
5924

6025
// Create a matrix factorization trainer which may consume "Value" as the training label, "MatrixColumnIndex" as the
6126
// matrix's column index, and "MatrixRowIndex" as the matrix's row index. Here nameof(...) is used to extract field
6227
// names' in MatrixElement class.
63-
6428
var options = new MatrixFactorizationTrainer.Options
6529
{
6630
MatrixColumnIndexColumnName = nameof(MatrixElement.MatrixColumnIndex),
6731
MatrixRowIndexColumnName = nameof(MatrixElement.MatrixRowIndex),
6832
LabelColumnName = nameof(MatrixElement.Value),
6933
NumIterations = 10,
7034
NumThreads = 1,
71-
K = 32,
35+
ApproximationRank = 32,
36+
LearningRate = 0.3
7237
};
7338

7439
var pipeline = mlContext.Recommendation().Trainers.MatrixFactorization(options);
@@ -84,11 +49,11 @@ public static void MatrixFactorizationInMemoryData()
8449
label: nameof(MatrixElement.Value), score: nameof(MatrixElementForScore.Score));
8550

8651
// Print out some metrics for checking the model's quality.
87-
Console.WriteLine($"L1 - {metrics.L1}");
88-
Console.WriteLine($"L2 - {metrics.L2}");
89-
Console.WriteLine($"LossFunction - {metrics.LossFn}");
90-
Console.WriteLine($"RMS - {metrics.Rms}");
91-
Console.WriteLine($"RSquared - {metrics.RSquared}");
52+
Console.WriteLine($"L1 - {metrics.L1}"); // 0.16375
53+
Console.WriteLine($"L2 - {metrics.L2}"); // 0.04407
54+
Console.WriteLine($"LossFunction - {metrics.LossFn}"); // 0.04407
55+
Console.WriteLine($"RMS - {metrics.Rms}"); // 0.2099
56+
Console.WriteLine($"RSquared - {metrics.RSquared}"); // 0.97797
9257

9358
// Create two two entries for making prediction. Of course, the prediction value, Score, is unknown so it can be anything
9459
// (here we use Score=0 and it will be overwritten by the true prediction). If any of row and column indexes are out-of-range
@@ -101,8 +66,10 @@ public static void MatrixFactorizationInMemoryData()
10166
var testDataView = mlContext.Data.ReadFromEnumerable(testMatrix);
10267

10368
// Feed the test data into the model and then iterate through all predictions.
104-
foreach (var pred in mlContext.CreateEnumerable<MatrixElementForScore>(testDataView, false))
105-
Console.WriteLine($"Predicted value at row {pred.MatrixRowIndex} and column {pred.MatrixColumnIndex} is {pred.Score}");
69+
foreach (var pred in mlContext.CreateEnumerable<MatrixElementForScore>(model.Transform(testDataView), false))
70+
Console.WriteLine($"Predicted value at row {pred.MatrixRowIndex-1} and column {pred.MatrixColumnIndex-1} is {pred.Score}");
71+
// Predicted value at row 7 and column 1 is 2.828761
72+
// Predicted value at row 6 and column 3 is 3.642226
10673
}
10774
}
10875
}

0 commit comments

Comments
 (0)