diff --git a/src/Microsoft.ML.CpuMath/CpuMathUtils.netstandard.cs b/src/Microsoft.ML.CpuMath/CpuMathUtils.netstandard.cs
index b062898aec..deed03ba3e 100644
--- a/src/Microsoft.ML.CpuMath/CpuMathUtils.netstandard.cs
+++ b/src/Microsoft.ML.CpuMath/CpuMathUtils.netstandard.cs
@@ -19,11 +19,17 @@ internal static partial class CpuMathUtils
         public static int GetVectorAlignment()
             => Vector128Alignment;
 
+        /// <summary>
+        /// Check if <paramref name="a"/>'s alignment is suitable to SSE instructions. Returns <see langword="true"/>
+        /// if <paramref name="a"/>'s alignment is ok and <see langword="false"/> otherwise.
+        /// </summary>
+        /// <param name="a">The vector being checked.</param>
+        /// <returns>Whether <paramref name="a"/> is aligned well.</returns>
         private static bool Compat(AlignedArray a)
         {
             Contracts.AssertValue(a);
             Contracts.Assert(a.Size > 0);
-            return a.CbAlign == Vector128Alignment;
+            return a.CbAlign % Vector128Alignment == 0;
         }
 
         private static unsafe float* Ptr(AlignedArray a, float* p)
@@ -34,6 +40,19 @@ private static bool Compat(AlignedArray a)
             return q;
         }
 
+        /// <summary>
+        /// Compute the product of matrix <paramref name="mat"/> (the matrix is flattened because its type is <see cref="AlignedArray"/> instead of a matrix)
+        /// and a vector <paramref name="src"/>.
+        /// </summary>
+        /// <param name="tran">Whether to transpose <paramref name="mat"/> before doing any computation.</param>
+        /// <param name="mat">If <paramref name="tran"/> is <see langword="false"/>, <paramref name="mat"/> is a m-by-n matrix, and the value at the i-th row and the j-th column is indexed by i * n + j in <paramref name="mat"/>.
+        /// If <paramref name="tran"/> is <see langword="true"/>, <paramref name="mat"/> would be viewed a n-by-m matrix, and the value at the i-th row and the j-th column in the transposed matrix is indexed by j * m + i in the
+        /// original <paramref name="mat"/>.</param>
+        /// <param name="src">A n-by-1 matrix, which is also a vector.</param>
+        /// <param name="dst">A m-by-1 matrix, which is also a vector.</param>
+        /// <param name="crun">The truncation level of <paramref name="dst"/>. For example, if <paramref name="crun"/> is 2, <paramref name="dst"/>
+        /// will be considered as a 2-by-1 matrix and therefore elements after its 2nd element will be ignored. If no truncation should happen,
+        /// set <paramref name="crun"/> to the length of <paramref name="dst"/>.</param>
         public static void MatrixTimesSource(bool tran, AlignedArray mat, AlignedArray src, AlignedArray dst, int crun)
         {
             Contracts.Assert(Compat(mat));
diff --git a/src/Microsoft.ML.CpuMath/Properties/AssemblyInfo.cs b/src/Microsoft.ML.CpuMath/Properties/AssemblyInfo.cs
index 1d0ef92696..8c5ecaaac4 100644
--- a/src/Microsoft.ML.CpuMath/Properties/AssemblyInfo.cs
+++ b/src/Microsoft.ML.CpuMath/Properties/AssemblyInfo.cs
@@ -11,5 +11,6 @@
 [assembly: InternalsVisibleTo(assemblyName: "LibSvmWrapper" + InternalPublicKey.Value)]
 [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Runtime.NeuralNetworks" + InternalPublicKey.Value)]
 [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.RServerScoring.NeuralNetworks" + InternalPublicKey.Value)]
+[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Tests" + PublicKey.TestValue)]
 [assembly: InternalsVisibleTo(assemblyName: "RunTests" + InternalPublicKey.Value)]
 [assembly: InternalsVisibleTo(assemblyName: "SseTests" + InternalPublicKey.Value)]
diff --git a/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj b/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj
index adc608cffd..df32acfd36 100644
--- a/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj
+++ b/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj
@@ -26,6 +26,7 @@
     <ProjectReference Include="..\..\src\Microsoft.ML.TensorFlow.StaticPipe\Microsoft.ML.TensorFlow.StaticPipe.csproj" />
     <ProjectReference Include="..\..\src\Microsoft.ML.TensorFlow\Microsoft.ML.TensorFlow.csproj" />
     <ProjectReference Include="..\..\src\Microsoft.ML.TimeSeries\Microsoft.ML.TimeSeries.csproj" />
+    <ProjectReference Include="..\..\src\Microsoft.ML.CpuMath\Microsoft.ML.CpuMath.csproj" />
     <ProjectReference Include="..\Microsoft.ML.Predictor.Tests\Microsoft.ML.Predictor.Tests.csproj" />
     <ProjectReference Include="..\Microsoft.ML.TestFramework\Microsoft.ML.TestFramework.csproj" />
   </ItemGroup>
diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs
index 23289dcd48..c1d7ad8370 100644
--- a/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs
+++ b/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs
@@ -8,6 +8,7 @@
 using System.Linq;
 using System.Runtime.InteropServices;
 using Microsoft.ML.Data;
+using Microsoft.ML.Internal.CpuMath;
 using Microsoft.ML.RunTests;
 using Microsoft.ML.TestFramework.Attributes;
 using Microsoft.ML.Trainers;
@@ -253,6 +254,29 @@ public void MatrixFactorizationInMemoryData()
                 Assert.True(pred.Score != 0);
         }
 
+        internal class MatrixElementZeroBased256By256
+        {
+            // Matrix column index starts from 0 and is at most _synthesizedMatrixColumnCount.
+            [KeyType(_matrixColumnCount)]
+            public uint MatrixColumnIndex;
+            // Matrix row index starts from 0 and is at most _synthesizedMatrixRowCount.
+            [KeyType(_matrixRowCount)]
+            public uint MatrixRowIndex;
+            // The value at the MatrixColumnIndex-th column and the MatrixRowIndex-th row in the considered matrix.
+            public float Value;
+        }
+
+        internal class MatrixElementZeroBasedForScore256By256
+        {
+            // Matrix column index starts from 0 and is at most _synthesizedMatrixColumnCount.
+            [KeyType(_matrixColumnCount)]
+            public uint MatrixColumnIndex;
+            // Matrix row index starts from 0 and is at most _synthesizedMatrixRowCount.
+            [KeyType(_matrixRowCount)]
+            public uint MatrixRowIndex;
+            public float Score;
+        }
+
         internal class MatrixElementZeroBased
         {
             // Matrix column index starts from 0 and is at most _synthesizedMatrixColumnCount.
@@ -268,11 +292,9 @@ internal class MatrixElementZeroBased
         internal class MatrixElementZeroBasedForScore
         {
             // Matrix column index starts from 0 and is at most _synthesizedMatrixColumnCount.
-            // Contieuous=true means that all values from 0 to _synthesizedMatrixColumnCount are allowed keys.
             [KeyType(_synthesizedMatrixColumnCount)]
             public uint MatrixColumnIndex;
             // Matrix row index starts from 0 and is at most _synthesizedMatrixRowCount.
-            // Contieuous=true means that all values from 0 to _synthesizedMatrixRowCount are allowed keys.
             [KeyType(_synthesizedMatrixRowCount)]
             public uint MatrixRowIndex;
             public float Score;
@@ -603,5 +625,97 @@ public void OneClassMatrixFactorizationWithUnseenColumnAndRow()
             CompareNumbersWithTolerance(0.05511549, testResults[1].Score, digitsOfPrecision: 5);
             CompareNumbersWithTolerance(0.00316973357, testResults[2].Score, digitsOfPrecision: 5);
         }
+
+        const int _matrixColumnCount = 256;
+        const int _matrixRowCount = 256;
+
+        [MatrixFactorizationFact]
+        public void InspectMatrixFactorizationModel()
+        {
+            // Create an in-memory matrix as a list of tuples (column index, row index, value).
+            // Iterators i and j are column and row indexes, respectively.
+            var dataMatrix = new List<MatrixElementZeroBased256By256>();
+            for (uint i = 0; i < _matrixColumnCount; ++i)
+                for (uint j = 0; j < _matrixRowCount; ++j)
+                    dataMatrix.Add(new MatrixElementZeroBased256By256() { MatrixColumnIndex = i, MatrixRowIndex = j, Value = (i + j) % 5 });
+
+            // Convert the in-memory matrix into an IDataView so that ML.NET components can consume it.
+            var dataView = ML.Data.LoadFromEnumerable(dataMatrix);
+
+            // Create a matrix factorization trainer which may consume "Value" as the training label, "MatrixColumnIndex" as the
+            // matrix's column index, and "MatrixRowIndex" as the matrix's row index.
+            var mlContext = new MLContext(seed: 1);
+
+            var options = new MatrixFactorizationTrainer.Options
+            {
+                MatrixColumnIndexColumnName = nameof(MatrixElement.MatrixColumnIndex),
+                MatrixRowIndexColumnName = nameof(MatrixElement.MatrixRowIndex),
+                LabelColumnName = nameof(MatrixElement.Value),
+                NumberOfIterations = 100,
+                NumberOfThreads = 1, // To eliminate randomness, # of threads must be 1.
+                ApproximationRank = 64,
+                LearningRate = 0.5,
+            };
+
+            var pipeline = mlContext.Recommendation().Trainers.MatrixFactorization(options);
+
+            // Train a matrix factorization model.
+            var model = pipeline.Fit(dataView);
+
+            // Check if the expected types in the trained model are expected.
+            Assert.True(model.MatrixColumnIndexColumnName == nameof(MatrixElementZeroBased256By256.MatrixColumnIndex));
+            Assert.True(model.MatrixRowIndexColumnName == nameof(MatrixElementZeroBased256By256.MatrixRowIndex));
+            var matColKeyType = model.MatrixColumnIndexColumnType as KeyDataViewType;
+            Assert.NotNull(matColKeyType);
+            var matRowKeyType = model.MatrixRowIndexColumnType as KeyDataViewType;
+            Assert.NotNull(matRowKeyType);
+            Assert.True(matColKeyType.Count == _matrixColumnCount);
+            Assert.True(matRowKeyType.Count == _matrixRowCount);
+
+            // Create a test set with assigning scores. It stands for the 2nd column of the training matrix.
+            var testMatrix = new List<MatrixElementZeroBasedForScore256By256>();
+            for (/* column index */ uint i = 1; i < 2; ++i)
+                for (/* row index */ uint j = 0; j < _matrixRowCount; ++j)
+                    testMatrix.Add(new MatrixElementZeroBasedForScore256By256() { MatrixColumnIndex = i, MatrixRowIndex = j, Score = 0 });
+
+            // Load test set as IDataView.
+            var testData = ML.Data.LoadFromEnumerable(testMatrix);
+
+            // Apply the trained model to the training set
+            var transformedTestData = model.Transform(testData);
+
+            // Load back predictions on the 2nd column as IEnumerable<MatrixElementZeroBasedForScore>.
+            var predictions = mlContext.Data.CreateEnumerable<MatrixElementZeroBasedForScore256By256>(transformedTestData, false).ToList();
+
+            // Inspect the trained model.
+            int m = model.Model.NumberOfRows;
+            int n = model.Model.NumberOfColumns;
+            int k = model.Model.ApproximationRank;
+
+            // The training matrix is approximated by leftFactorMatrix * rightFactorMatrix^T, where "^T" means matrix transpose.
+            // Thus, to compute the approximation of the 2nd column, we only need the whole leftFactorMatrix and the 2nd row in rightFactorMatrix.
+
+            // First copy the trained left factor matrix to an aligned for applying SSE code.
+            var leftFactorMatrix = model.Model.LeftFactorMatrix;
+            var leftFactorMatrixAligned = new AlignedArray(m * k, 64);
+            for (int i = 0; i < leftFactorMatrix.Count; ++i)
+                leftFactorMatrixAligned[i] = leftFactorMatrix[i];
+
+            // Second copy the trained right factor row to a k-by-1 aligned vector for applying SSE code.
+            var rightFactorVectorAligned = new AlignedArray(k, 64);
+            for (int i = 0; i < k; ++i)
+                rightFactorVectorAligned[i] = model.Model.RightFactorMatrix[1 * k + i]; // value at the i-th row and j-th column is indexed by i * k + j.
+
+            // Prepare buffer to store result. The result will be a matrix-vector product, where the matrix is leftFactorMatrix
+            // and the vector is the 2nd row of rightFactorMatrix.
+            var valuesAtSecondColumn = new AlignedArray(m, 64);
+
+            // Compute leftFactorMatrixAligned (m-by-k) * rightFactorVectorAligned (k-by-1).
+            CpuMathUtils.MatrixTimesSource(false, leftFactorMatrixAligned, rightFactorVectorAligned, valuesAtSecondColumn, m);
+
+            // Check if results computed by SSE code and MF predictor are the same.
+            for (int i = 0; i < predictions.Count(); ++i)
+                Assert.Equal(predictions[i].Score, valuesAtSecondColumn[i], 3);
+        }
     }
 }
\ No newline at end of file