Skip to content

Commit 89462b7

Browse files
committed
Adding Factorization Machine and getting to build on Windows
1 parent d47cd0c commit 89462b7

File tree

9 files changed

+1000
-2
lines changed

9 files changed

+1000
-2
lines changed
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
5+
using System.Reflection;
6+
using System.Runtime.CompilerServices;
7+
using System.Runtime.InteropServices;
8+
9+
[assembly: InternalsVisibleTo("Microsoft.ML.StandardLearners, PublicKey=00240000048000009400000006020000002400005253413100040000010001004b86c4cb78549b34bab61a3b1800e23bfeb5b3ec390074041536a7e3cbd97f5f04cf0f857155a8928eaa29ebfd11cfbbad3ba70efea7bda3226c6a8d370a4cd303f714486b6ebc225985a638471e6ef571cc92a4613c00b8fa65d61ccee0cbe5f36330c9a01f4183559f1bef24cc2917c6d913e3a541333a1d05d9bed22b38cb")]

src/Microsoft.ML.CpuMath/Microsoft.ML.CpuMath.csproj

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,8 @@
1111
<ProjectReference Include="..\Microsoft.ML.Core\Microsoft.ML.Core.csproj" />
1212
</ItemGroup>
1313

14+
<ItemGroup>
15+
<Folder Include="Properties\" />
16+
</ItemGroup>
17+
1418
</Project>

src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachine.cs

Lines changed: 713 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
using Microsoft.ML.Runtime.Internal.CpuMath;
2+
using Microsoft.ML.Runtime.Internal.Utilities;
3+
using System.Runtime.InteropServices;
4+
5+
using System.Security;
6+
7+
namespace Microsoft.ML.Runtime.FactorizationMachine
8+
{
9+
internal unsafe static class FieldAwareFactorizationMachineInterface
10+
{
11+
internal const string NativePath = "FactorizationMachineNative.dll";
12+
public const int CbAlign = 16;
13+
14+
private static bool Compat(AlignedArray a)
15+
{
16+
Contracts.AssertValue(a);
17+
Contracts.Assert(a.Size > 0);
18+
return a.CbAlign == CbAlign;
19+
}
20+
21+
private unsafe static float* Ptr(AlignedArray a, float* p)
22+
{
23+
Contracts.AssertValue(a);
24+
float* q = p + a.GetBase((long)p);
25+
Contracts.Assert(((long)q & (CbAlign - 1)) == 0);
26+
return q;
27+
}
28+
29+
[DllImport(NativePath), SuppressUnmanagedCodeSecurity]
30+
public static extern void CalculateIntermediateVariablesNative(int fieldCount, int latentDim, int count, int* /*const*/ fieldIndices, int* /*const*/ featureIndices,
31+
float* /*const*/ featureValues, float* /*const*/ linearWeights, float* /*const*/ latentWeights, float* latentSum, float* response);
32+
33+
[DllImport(NativePath), SuppressUnmanagedCodeSecurity]
34+
public static extern void CalculateGradientAndUpdateNative(float lambdaLinear, float lambdaLatent, float learningRate, int fieldCount, int latentDim, float weight,
35+
int count, int* /*const*/ fieldIndices, int* /*const*/ featureIndices, float* /*const*/ featureValues, float* /*const*/ latentSum, float slope,
36+
float* linearWeights, float* latentWeights, float* linearAccumulatedSquaredGrads, float* latentAccumulatedSquaredGrads);
37+
38+
public static void CalculateIntermediateVariables(int fieldCount, int latentDim, int count, int[] fieldIndices, int[] featureIndices, float[] featureValues,
39+
float[] linearWeights, AlignedArray latentWeights, AlignedArray latentSum, ref float response)
40+
{
41+
Contracts.AssertNonEmpty(fieldIndices);
42+
Contracts.AssertNonEmpty(featureValues);
43+
Contracts.AssertNonEmpty(featureIndices);
44+
Contracts.AssertNonEmpty(linearWeights);
45+
Contracts.Assert(Compat(latentWeights));
46+
Contracts.Assert(Compat(latentSum));
47+
48+
unsafe
49+
{
50+
fixed (int* pf = &fieldIndices[0])
51+
fixed (int* pi = &featureIndices[0])
52+
fixed (float* px = &featureValues[0])
53+
fixed (float* pw = &linearWeights[0])
54+
fixed (float* pv = &latentWeights.Items[0])
55+
fixed (float* pq = &latentSum.Items[0])
56+
fixed (float* pr = &response)
57+
CalculateIntermediateVariablesNative(fieldCount, latentDim, count, pf, pi, px, pw, Ptr(latentWeights, pv), Ptr(latentSum, pq), pr);
58+
}
59+
}
60+
61+
public static void CalculateGradientAndUpdate(float lambdaLinear, float lambdaLatent, float learningRate, int fieldCount, int latentDim,
62+
float weight, int count, int[] fieldIndices, int[] featureIndices, float[] featureValues, AlignedArray latentSum, float slope,
63+
float[] linearWeights, AlignedArray latentWeights, float[] linearAccumulatedSquaredGrads, AlignedArray latentAccumulatedSquaredGrads)
64+
{
65+
Contracts.AssertNonEmpty(fieldIndices);
66+
Contracts.AssertNonEmpty(featureIndices);
67+
Contracts.AssertNonEmpty(featureValues);
68+
Contracts.Assert(Compat(latentSum));
69+
Contracts.AssertNonEmpty(linearWeights);
70+
Contracts.Assert(Compat(latentWeights));
71+
Contracts.AssertNonEmpty(linearAccumulatedSquaredGrads);
72+
Contracts.Assert(Compat(latentAccumulatedSquaredGrads));
73+
74+
unsafe
75+
{
76+
fixed (int* pf = &fieldIndices[0])
77+
fixed (int* pi = &featureIndices[0])
78+
fixed (float* px = &featureValues[0])
79+
fixed (float* pq = &latentSum.Items[0])
80+
fixed (float* pw = &linearWeights[0])
81+
fixed (float* pv = &latentWeights.Items[0])
82+
fixed (float* phw = &linearAccumulatedSquaredGrads[0])
83+
fixed (float* phv = &latentAccumulatedSquaredGrads.Items[0])
84+
CalculateGradientAndUpdateNative(lambdaLinear, lambdaLatent, learningRate, fieldCount, latentDim, weight, count, pf, pi, px,
85+
Ptr(latentSum, pq), slope, pw, Ptr(latentWeights, pv), phw, Ptr(latentAccumulatedSquaredGrads, phv));
86+
}
87+
88+
}
89+
}
90+
}

src/Microsoft.ML.StandardLearners/Microsoft.ML.StandardLearners.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
<PropertyGroup>
44
<TargetFramework>netstandard2.0</TargetFramework>
55
<IncludeInPackage>Microsoft.ML</IncludeInPackage>
6+
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
67
</PropertyGroup>
78

89
<ItemGroup>

src/Native/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,4 +179,5 @@ function(install_library_and_symbols targetName)
179179
endfunction()
180180

181181
add_subdirectory(CpuMathNative)
182-
add_subdirectory(FastTreeNative)
182+
add_subdirectory(FastTreeNative)
183+
add_subdirectory(FactorizationMachineNative)
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
cmake_minimum_required (VERSION 3.2)
2+
project (FactorizationMachineNative)
3+
4+
set(SOURCES
5+
FactorizationMachineCore.cpp
6+
)
7+
8+
if(WIN32)
9+
else()
10+
set_property(SOURCE segment.cpp APPEND_STRING PROPERTY COMPILE_FLAGS " -msse4.1")
11+
list(APPEND SOURCES ${VERSION_FILE_PATH})
12+
endif()
13+
14+
add_library(FactorizationMachineNative SHARED ${SOURCES} ${RESOURCES})
15+
16+
install_library_and_symbols (FactorizationMachineNative)
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
#include <cmath>
2+
#include <cstring>
3+
#include <limits>
4+
#include <pmmintrin.h>
5+
6+
#define UNUSED(x) (void)(x)
7+
#define DEBUG_ONLY(x) (void)(x)
8+
9+
#ifdef COMPILER_GCC
10+
11+
#include "UnixSal.h"
12+
#define EXPORT_API(ret) extern "C" __attribute__((visibility("default"))) ret
13+
14+
#else
15+
#include <intrin.h>
16+
#define EXPORT_API(ret) extern "C" __declspec(dllexport) ret __stdcall
17+
#endif
18+
19+
EXPORT_API(void) CalculateIntermediateVariablesNative(int fieldCount, int latentDim, int count, _In_ int * fieldIndices, _In_ int * featureIndices, _In_ float * featureValues,
20+
_In_ float * linearWeights, _In_ float * latentWeights, _Inout_ float * latentSum, _Out_ float * response)
21+
{
22+
const int m = fieldCount;
23+
const int d = latentDim;
24+
const int c = count;
25+
const int * pf = fieldIndices;
26+
const int * pi = featureIndices;
27+
const float * px = featureValues;
28+
const float * pw = linearWeights;
29+
const float * pv = latentWeights;
30+
float * pq = latentSum;
31+
float linearResponse = 0;
32+
float latentResponse = 0;
33+
34+
memset(pq, 0, sizeof(float) * m * m * d);
35+
__m128 _y = _mm_setzero_ps();
36+
__m128 _tmp = _mm_setzero_ps();
37+
38+
for (int i = 0; i < c; i++)
39+
{
40+
const int f = pf[i];
41+
const int j = pi[i];
42+
linearResponse += pw[j] * px[i];
43+
44+
const __m128 _x = _mm_load1_ps(px + i);
45+
const __m128 _xx = _mm_mul_ps(_x, _x);
46+
47+
// tmp -= <v_j,f, v_j,f> * x * x
48+
const int vBias = j * m * d + f * d;
49+
const float * vjf = pv + vBias;
50+
for (int k = 0; k + 4 <= d; k += 4)
51+
{
52+
const __m128 _v = _mm_load_ps(vjf + k);
53+
_tmp = _mm_sub_ps(_tmp, _mm_mul_ps(_mm_mul_ps(_v, _v), _xx));
54+
}
55+
56+
for (int fprime = 0; fprime < m; fprime++)
57+
{
58+
const int vBias = j * m * d + fprime * d;
59+
const int qBias = f * m * d + fprime * d;
60+
const float * vjfprime = pv + vBias;
61+
float * qffprime = pq + qBias;
62+
63+
// q_f,f' += v_j,f' * x
64+
for (int k = 0; k + 4 <= d; k += 4)
65+
{
66+
const __m128 _v = _mm_load_ps(vjfprime + k);
67+
__m128 _q = _mm_load_ps(qffprime + k);
68+
_q = _mm_add_ps(_q, _mm_mul_ps(_v, _x));
69+
_mm_store_ps(qffprime + k, _q);
70+
}
71+
}
72+
}
73+
74+
for (int f = 0; f < m; f++)
75+
{
76+
// tmp += <q_f,f, q_f,f>
77+
const float * qff = pq + f * m * d + f * d;
78+
for (int k = 0; k + 4 <= d; k += 4)
79+
{
80+
__m128 _qff = _mm_load_ps(qff + k);
81+
_tmp = _mm_add_ps(_tmp, _mm_mul_ps(_qff, _qff));
82+
}
83+
84+
// y += <q_f,f', q_f',f>, f != f'
85+
for (int fprime = f + 1; fprime < m; fprime++)
86+
{
87+
const float * qffprime = pq + f * m * d + fprime * d;
88+
const float * qfprimef = pq + fprime * m * d + f * d;
89+
for (int k = 0; k + 4 <= d; k += 4)
90+
{
91+
__m128 _qffprime = _mm_load_ps(qffprime + k);
92+
__m128 _qfprimef = _mm_load_ps(qfprimef + k);
93+
_y = _mm_add_ps(_y, _mm_mul_ps(_qffprime, _qfprimef));
94+
}
95+
}
96+
}
97+
98+
_y = _mm_add_ps(_y, _mm_mul_ps(_mm_set_ps1(0.5f), _tmp));
99+
_tmp = _mm_add_ps(_y, _mm_movehl_ps(_y, _y));
100+
_y = _mm_add_ps(_tmp, _mm_shuffle_ps(_tmp, _tmp, 1)); // the lowest slot is the response value
101+
_mm_store_ss(&latentResponse, _y);
102+
*response = linearResponse + latentResponse;
103+
}
104+
105+
EXPORT_API(void) CalculateGradientAndUpdateNative(float lambdaLinear, float lambdaLatent, float learningRate, int fieldCount, int latentDim, float weight, int count,
106+
_In_ int* /*const*/ fieldIndices, _In_ int* /*const*/ featureIndices, _In_ float* /*const*/ featureValues, _In_ float* /*const*/ latentSum, float slope,
107+
_Inout_ float* linearWeights, _Inout_ float* latentWeights, _Inout_ float* linearAccumulatedSquaredGrads, _Inout_ float* latentAccumulatedSquaredGrads)
108+
{
109+
const int m = fieldCount;
110+
const int d = latentDim;
111+
const int c = count;
112+
const int * pf = fieldIndices;
113+
const int * pi = featureIndices;
114+
const float * px = featureValues;
115+
const float * pq = latentSum;
116+
float * pw = linearWeights;
117+
float * pv = latentWeights;
118+
float * phw = linearAccumulatedSquaredGrads;
119+
float * phv = latentAccumulatedSquaredGrads;
120+
121+
const __m128 _wei = _mm_set_ps1(weight);
122+
const __m128 _s = _mm_set_ps1(slope);
123+
const __m128 _lr = _mm_set_ps1(learningRate);
124+
const __m128 _lambdav = _mm_set_ps1(lambdaLatent);
125+
126+
for (int i = 0; i < count; i++)
127+
{
128+
const int f = pf[i];
129+
const int j = pi[i];
130+
131+
// update linear term w_j
132+
float g = weight * (lambdaLinear * pw[j] + slope * px[i]);
133+
phw[j] += g * g;
134+
pw[j] -= learningRate / sqrt(phw[j]) * g;
135+
136+
// update latent term, v_j,f', f'=1,...,m
137+
const __m128 _x = _mm_load1_ps(px + i);
138+
for (int fprime = 0; fprime < m; fprime++)
139+
{
140+
float * vjfprime = pv + j * m * d + fprime * d;
141+
float * hvjfprime = phv + j * m * d + fprime * d;
142+
const float * qfprimef = pq + fprime * m * d + f * d;
143+
const __m128 _sx = _mm_mul_ps(_s, _x);
144+
145+
for (int k = 0; k + 4 <= d; k += 4)
146+
{
147+
__m128 _v = _mm_load_ps(vjfprime + k);
148+
__m128 _q = _mm_load_ps(qfprimef + k);
149+
__m128 _g = _mm_mul_ps(_lambdav, _v);
150+
if (fprime != f)
151+
_g = _mm_add_ps(_g, _mm_mul_ps(_sx, _q));
152+
else
153+
_g = _mm_add_ps(_g, _mm_mul_ps(_sx, _mm_sub_ps(_q, _mm_mul_ps(_v, _x))));
154+
_g = _mm_mul_ps(_wei, _g);
155+
156+
const __m128 _h = _mm_add_ps(_mm_load_ps(hvjfprime + k), _mm_mul_ps(_g, _g));
157+
_v = _mm_sub_ps(_v, _mm_mul_ps(_lr, _mm_mul_ps(_mm_rsqrt_ps(_h), _g)));
158+
_mm_store_ps(vjfprime + k, _v);
159+
_mm_store_ps(hvjfprime + k, _h);
160+
}
161+
}
162+
}
163+
}

src/Native/build.proj

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,10 @@
7171
<ItemGroup>
7272
<NativePackageAsset Include="$(NativeAssetsBuiltPath)\$(NativeLibPrefix)CpuMathNative$(NativeLibExtension)"
7373
RelativePath="Microsoft.ML\runtimes\$(PackageRid)\native" />
74-
7574
<NativePackageAsset Include="$(NativeAssetsBuiltPath)\$(NativeLibPrefix)FastTreeNative$(NativeLibExtension)"
7675
RelativePath="Microsoft.ML\runtimes\$(PackageRid)\native" />
76+
<NativePackageAsset Include="$(NativeAssetsBuiltPath)\$(NativeLibPrefix)FactorizationMachineNative$(NativeLibExtension)"
77+
RelativePath="Microsoft.ML\runtimes\$(PackageRid)\native" />
7778
</ItemGroup>
7879

7980
<ItemGroup>

0 commit comments

Comments
 (0)