Adding Factorization Machine and getting to build on Windows

sfilipi · sfilipi · commit 89462b71b9ad · 2018-06-19T14:55:51.000-07:00
diff --git a/src/Microsoft.ML.CpuMath/AssemblyInfo.cs b/src/Microsoft.ML.CpuMath/AssemblyInfo.cs
@@ -0,0 +1,9 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+[assembly: InternalsVisibleTo("Microsoft.ML.StandardLearners, PublicKey=00240000048000009400000006020000002400005253413100040000010001004b86c4cb78549b34bab61a3b1800e23bfeb5b3ec390074041536a7e3cbd97f5f04cf0f857155a8928eaa29ebfd11cfbbad3ba70efea7bda3226c6a8d370a4cd303f714486b6ebc225985a638471e6ef571cc92a4613c00b8fa65d61ccee0cbe5f36330c9a01f4183559f1bef24cc2917c6d913e3a541333a1d05d9bed22b38cb")]
diff --git a/src/Microsoft.ML.CpuMath/Microsoft.ML.CpuMath.csproj b/src/Microsoft.ML.CpuMath/Microsoft.ML.CpuMath.csproj
@@ -11,4 +11,8 @@
     <ProjectReference Include="..\Microsoft.ML.Core\Microsoft.ML.Core.csproj" />
   </ItemGroup>
 
+  <ItemGroup>
+    <Folder Include="Properties\" />
+  </ItemGroup>
+
 </Project>
diff --git a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachine.cs b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachine.cs
diff --git a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineInterface.cs b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineInterface.cs
@@ -0,0 +1,90 @@
+﻿using Microsoft.ML.Runtime.Internal.CpuMath;
+using Microsoft.ML.Runtime.Internal.Utilities;
+using System.Runtime.InteropServices;
+
+using System.Security;
+
+namespace Microsoft.ML.Runtime.FactorizationMachine
+{
+    internal unsafe static class FieldAwareFactorizationMachineInterface
+    {
+        internal const string NativePath = "FactorizationMachineNative.dll";
+        public const int CbAlign = 16;
+
+        private static bool Compat(AlignedArray a)
+        {
+            Contracts.AssertValue(a);
+            Contracts.Assert(a.Size > 0);
+            return a.CbAlign == CbAlign;
+        }
+
+        private unsafe static float* Ptr(AlignedArray a, float* p)
+        {
+            Contracts.AssertValue(a);
+            float* q = p + a.GetBase((long)p);
+            Contracts.Assert(((long)q & (CbAlign - 1)) == 0);
+            return q;
+        }
+
+        [DllImport(NativePath), SuppressUnmanagedCodeSecurity]
+        public static extern void CalculateIntermediateVariablesNative(int fieldCount, int latentDim, int count, int* /*const*/ fieldIndices, int* /*const*/ featureIndices,
+            float* /*const*/ featureValues, float* /*const*/ linearWeights, float* /*const*/ latentWeights, float* latentSum, float* response);
+
+        [DllImport(NativePath), SuppressUnmanagedCodeSecurity]
+        public static extern void CalculateGradientAndUpdateNative(float lambdaLinear, float lambdaLatent, float learningRate, int fieldCount, int latentDim, float weight,
+            int count, int* /*const*/ fieldIndices, int* /*const*/ featureIndices, float* /*const*/ featureValues, float* /*const*/ latentSum, float slope,
+            float* linearWeights, float* latentWeights, float* linearAccumulatedSquaredGrads, float* latentAccumulatedSquaredGrads);
+
+        public static void CalculateIntermediateVariables(int fieldCount, int latentDim, int count, int[] fieldIndices, int[] featureIndices, float[] featureValues,
+            float[] linearWeights, AlignedArray latentWeights, AlignedArray latentSum, ref float response)
+        {
+            Contracts.AssertNonEmpty(fieldIndices);
+            Contracts.AssertNonEmpty(featureValues);
+            Contracts.AssertNonEmpty(featureIndices);
+            Contracts.AssertNonEmpty(linearWeights);
+            Contracts.Assert(Compat(latentWeights));
+            Contracts.Assert(Compat(latentSum));
+
+            unsafe
+            {
+                fixed (int* pf = &fieldIndices[0])
+                fixed (int* pi = &featureIndices[0])
+                fixed (float* px = &featureValues[0])
+                fixed (float* pw = &linearWeights[0])
+                fixed (float* pv = &latentWeights.Items[0])
+                fixed (float* pq = &latentSum.Items[0])
+                fixed (float* pr = &response)
+                    CalculateIntermediateVariablesNative(fieldCount, latentDim, count, pf, pi, px, pw, Ptr(latentWeights, pv), Ptr(latentSum, pq), pr);
+            }
+        }
+
+        public static void CalculateGradientAndUpdate(float lambdaLinear, float lambdaLatent, float learningRate, int fieldCount, int latentDim,
+            float weight, int count, int[] fieldIndices, int[] featureIndices, float[] featureValues, AlignedArray latentSum, float slope,
+            float[] linearWeights, AlignedArray latentWeights, float[] linearAccumulatedSquaredGrads, AlignedArray latentAccumulatedSquaredGrads)
+        {
+            Contracts.AssertNonEmpty(fieldIndices);
+            Contracts.AssertNonEmpty(featureIndices);
+            Contracts.AssertNonEmpty(featureValues);
+            Contracts.Assert(Compat(latentSum));
+            Contracts.AssertNonEmpty(linearWeights);
+            Contracts.Assert(Compat(latentWeights));
+            Contracts.AssertNonEmpty(linearAccumulatedSquaredGrads);
+            Contracts.Assert(Compat(latentAccumulatedSquaredGrads));
+
+            unsafe
+            {
+                fixed (int* pf = &fieldIndices[0])
+                fixed (int* pi = &featureIndices[0])
+                fixed (float* px = &featureValues[0])
+                fixed (float* pq = &latentSum.Items[0])
+                fixed (float* pw = &linearWeights[0])
+                fixed (float* pv = &latentWeights.Items[0])
+                fixed (float* phw = &linearAccumulatedSquaredGrads[0])
+                fixed (float* phv = &latentAccumulatedSquaredGrads.Items[0])
+                    CalculateGradientAndUpdateNative(lambdaLinear, lambdaLatent, learningRate, fieldCount, latentDim, weight, count, pf, pi, px,
+                        Ptr(latentSum, pq), slope, pw, Ptr(latentWeights, pv), phw, Ptr(latentAccumulatedSquaredGrads, phv));
+            }
+
+        }
+    }
+}
diff --git a/src/Microsoft.ML.StandardLearners/Microsoft.ML.StandardLearners.csproj b/src/Microsoft.ML.StandardLearners/Microsoft.ML.StandardLearners.csproj
@@ -3,6 +3,7 @@
   <PropertyGroup>
     <TargetFramework>netstandard2.0</TargetFramework>
     <IncludeInPackage>Microsoft.ML</IncludeInPackage>
+	<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
 
   <ItemGroup>
diff --git a/src/Native/CMakeLists.txt b/src/Native/CMakeLists.txt
@@ -179,4 +179,5 @@ function(install_library_and_symbols targetName)
 endfunction()
 
 add_subdirectory(CpuMathNative)
-add_subdirectory(FastTreeNative)
+add_subdirectory(FastTreeNative)
+add_subdirectory(FactorizationMachineNative)
diff --git a/src/Native/FactorizationMachineNative/CMakeLists.txt b/src/Native/FactorizationMachineNative/CMakeLists.txt
@@ -0,0 +1,16 @@
+﻿cmake_minimum_required (VERSION 3.2)
+project (FactorizationMachineNative)
+
+set(SOURCES
+    FactorizationMachineCore.cpp
+)
+
+if(WIN32)
+else()
+    set_property(SOURCE segment.cpp APPEND_STRING PROPERTY COMPILE_FLAGS " -msse4.1")
+    list(APPEND SOURCES ${VERSION_FILE_PATH})
+endif()
+
+add_library(FactorizationMachineNative SHARED ${SOURCES} ${RESOURCES})
+
+install_library_and_symbols (FactorizationMachineNative)
diff --git a/src/Native/FactorizationMachineNative/FactorizationMachineCore.cpp b/src/Native/FactorizationMachineNative/FactorizationMachineCore.cpp
@@ -0,0 +1,163 @@
+#include <cmath>
+#include <cstring>
+#include <limits>
+#include <pmmintrin.h>
+
+#define UNUSED(x) (void)(x)
+#define DEBUG_ONLY(x) (void)(x)
+
+#ifdef COMPILER_GCC
+
+#include "UnixSal.h"
+#define EXPORT_API(ret) extern "C" __attribute__((visibility("default"))) ret
+
+#else
+#include <intrin.h>
+#define EXPORT_API(ret) extern "C" __declspec(dllexport) ret __stdcall
+#endif
+
+EXPORT_API(void) CalculateIntermediateVariablesNative(int fieldCount, int latentDim, int count, _In_ int * fieldIndices, _In_ int * featureIndices, _In_ float * featureValues,
+    _In_ float * linearWeights, _In_ float * latentWeights, _Inout_ float * latentSum, _Out_ float * response)
+{
+    const int m = fieldCount;
+    const int d = latentDim;
+    const int c = count;
+    const int * pf = fieldIndices;
+    const int * pi = featureIndices;
+    const float * px = featureValues;
+    const float * pw = linearWeights;
+    const float * pv = latentWeights;
+    float * pq = latentSum;
+    float linearResponse = 0;
+    float latentResponse = 0;
+
+    memset(pq, 0, sizeof(float) * m * m * d);
+    __m128 _y = _mm_setzero_ps();
+    __m128 _tmp = _mm_setzero_ps();
+
+    for (int i = 0; i < c; i++)
+    {
+        const int f = pf[i];
+        const int j = pi[i];
+        linearResponse += pw[j] * px[i];
+
+        const __m128 _x = _mm_load1_ps(px + i);
+        const __m128 _xx = _mm_mul_ps(_x, _x);
+
+        // tmp -= <v_j,f, v_j,f> * x * x
+        const int vBias = j * m * d + f * d;
+        const float * vjf = pv + vBias;
+        for (int k = 0; k + 4 <= d; k += 4)
+        {
+            const __m128 _v = _mm_load_ps(vjf + k);
+            _tmp = _mm_sub_ps(_tmp, _mm_mul_ps(_mm_mul_ps(_v, _v), _xx));
+        }
+
+        for (int fprime = 0; fprime < m; fprime++)
+        {
+            const int vBias = j * m * d + fprime * d;
+            const int qBias = f * m * d + fprime * d;
+            const float * vjfprime = pv + vBias;
+            float * qffprime = pq + qBias;
+
+            // q_f,f' += v_j,f' * x
+            for (int k = 0; k + 4 <= d; k += 4)
+            {
+                const __m128 _v = _mm_load_ps(vjfprime + k);
+                __m128 _q = _mm_load_ps(qffprime + k);
+                _q = _mm_add_ps(_q, _mm_mul_ps(_v, _x));
+                _mm_store_ps(qffprime + k, _q);
+            }
+        }
+    }
+
+    for (int f = 0; f < m; f++)
+    {
+        // tmp += <q_f,f, q_f,f>
+        const float * qff = pq + f * m * d + f * d;
+        for (int k = 0; k + 4 <= d; k += 4)
+        {
+            __m128 _qff = _mm_load_ps(qff + k);
+            _tmp = _mm_add_ps(_tmp, _mm_mul_ps(_qff, _qff));
+        }
+
+        // y += <q_f,f', q_f',f>, f != f'
+        for (int fprime = f + 1; fprime < m; fprime++)
+        {
+            const float * qffprime = pq + f * m * d + fprime * d;
+            const float * qfprimef = pq + fprime * m * d + f * d;
+            for (int k = 0; k + 4 <= d; k += 4)
+            {
+                __m128 _qffprime = _mm_load_ps(qffprime + k);
+                __m128 _qfprimef = _mm_load_ps(qfprimef + k);
+                _y = _mm_add_ps(_y, _mm_mul_ps(_qffprime, _qfprimef));
+            }
+        }
+    }
+
+    _y = _mm_add_ps(_y, _mm_mul_ps(_mm_set_ps1(0.5f), _tmp));
+    _tmp = _mm_add_ps(_y, _mm_movehl_ps(_y, _y));
+    _y = _mm_add_ps(_tmp, _mm_shuffle_ps(_tmp, _tmp, 1)); // the lowest slot is the response value
+    _mm_store_ss(&latentResponse, _y);
+    *response = linearResponse + latentResponse;
+}
+
+EXPORT_API(void) CalculateGradientAndUpdateNative(float lambdaLinear, float lambdaLatent, float learningRate, int fieldCount, int latentDim, float weight, int count,
+    _In_ int* /*const*/ fieldIndices, _In_ int* /*const*/ featureIndices, _In_ float* /*const*/ featureValues, _In_ float* /*const*/ latentSum, float slope,
+    _Inout_ float* linearWeights, _Inout_ float* latentWeights, _Inout_ float* linearAccumulatedSquaredGrads, _Inout_ float* latentAccumulatedSquaredGrads)
+{
+    const int m = fieldCount;
+    const int d = latentDim;
+    const int c = count;
+    const int * pf = fieldIndices;
+    const int * pi = featureIndices;
+    const float * px = featureValues;
+    const float * pq = latentSum;
+    float * pw = linearWeights;
+    float * pv = latentWeights;
+    float * phw = linearAccumulatedSquaredGrads;
+    float * phv = latentAccumulatedSquaredGrads;
+
+    const __m128 _wei = _mm_set_ps1(weight);
+    const __m128 _s = _mm_set_ps1(slope);
+    const __m128 _lr = _mm_set_ps1(learningRate);
+    const __m128 _lambdav = _mm_set_ps1(lambdaLatent);
+
+    for (int i = 0; i < count; i++)
+    {
+        const int f = pf[i];
+        const int j = pi[i];
+
+        // update linear term w_j
+        float g = weight * (lambdaLinear * pw[j] + slope * px[i]);
+        phw[j] += g * g;
+        pw[j] -= learningRate / sqrt(phw[j]) * g;
+
+        // update latent term, v_j,f', f'=1,...,m
+        const __m128 _x = _mm_load1_ps(px + i);
+        for (int fprime = 0; fprime < m; fprime++)
+        {
+            float * vjfprime = pv + j * m * d + fprime * d;
+            float * hvjfprime = phv + j * m * d + fprime * d;
+            const float * qfprimef = pq + fprime * m * d + f * d;
+            const __m128 _sx = _mm_mul_ps(_s, _x);
+
+            for (int k = 0; k + 4 <= d; k += 4)
+            {
+                __m128 _v = _mm_load_ps(vjfprime + k);
+                __m128 _q = _mm_load_ps(qfprimef + k);
+                __m128 _g = _mm_mul_ps(_lambdav, _v);
+                if (fprime != f)
+                    _g = _mm_add_ps(_g, _mm_mul_ps(_sx, _q));
+                else
+                    _g = _mm_add_ps(_g, _mm_mul_ps(_sx, _mm_sub_ps(_q, _mm_mul_ps(_v, _x))));
+                _g = _mm_mul_ps(_wei, _g);
+
+                const __m128 _h = _mm_add_ps(_mm_load_ps(hvjfprime + k), _mm_mul_ps(_g, _g));
+                _v = _mm_sub_ps(_v, _mm_mul_ps(_lr, _mm_mul_ps(_mm_rsqrt_ps(_h), _g)));
+                _mm_store_ps(vjfprime + k, _v);
+                _mm_store_ps(hvjfprime + k, _h);
+            }
+        }
+    }
+}
diff --git a/src/Native/build.proj b/src/Native/build.proj
@@ -71,9 +71,10 @@
     <ItemGroup>
       <NativePackageAsset Include="$(NativeAssetsBuiltPath)\$(NativeLibPrefix)CpuMathNative$(NativeLibExtension)"
                           RelativePath="Microsoft.ML\runtimes\$(PackageRid)\native" />
-
       <NativePackageAsset Include="$(NativeAssetsBuiltPath)\$(NativeLibPrefix)FastTreeNative$(NativeLibExtension)"
                           RelativePath="Microsoft.ML\runtimes\$(PackageRid)\native" />
+      <NativePackageAsset Include="$(NativeAssetsBuiltPath)\$(NativeLibPrefix)FactorizationMachineNative$(NativeLibExtension)"
+                          RelativePath="Microsoft.ML\runtimes\$(PackageRid)\native" />
     </ItemGroup>
 
     <ItemGroup>