Skip to content

Commit 35344df

Browse files
rickardpwkpark
authored andcommitted
Make native code portable
* minimal macos fixes Signed-off-by: Won-Kyu Park <[email protected]>
1 parent 5c0ba85 commit 35344df

File tree

5 files changed

+83
-33
lines changed

5 files changed

+83
-33
lines changed

include/Algo-Direct-Common.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ struct DirectInfo
190190
xi = xws;
191191
}
192192
else {
193-
myassert(Gap==1, "if Gap>1 then X workspace must be provided");
193+
myassert((Gap==1), "if Gap>1 then X workspace must be provided");
194194
xi = x;
195195
}
196196

include/Algo-Direct2.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ struct AlgoVecBase<I, T, A, typename std::enable_if<DirectAux::IsDirect2<A>::val
5252
private:
5353
typedef AlgoScalarBase<T, A> base_t;
5454

55+
#ifdef USE_SSE2
5556
FORCE_INLINE
5657
//NO_INLINE
5758
void resolve(const FVec<SSE, float>& vz, const IVec<SSE, float>& bidx, uint32 *pr) const
@@ -135,6 +136,7 @@ struct AlgoVecBase<I, T, A, typename std::enable_if<DirectAux::IsDirect2<A>::val
135136
pr[0] = u.ui32[0];
136137
pr[1] = u.ui32[2];
137138
}
139+
#endif // USE_SSE2
138140

139141
#ifdef USE_AVX
140142

include/Portable.h

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,40 @@
44
#include <stdexcept>
55
#include <sstream>
66

7+
#if defined(__aarch64__)
8+
#ifdef __CUDACC__
9+
#undef USE_NEON // Doesn't work with nvcc, undefined symbols
10+
#else
11+
#include <arm_neon.h>
12+
#undef USE_NEON // Not yet implemented
13+
#endif
14+
#undef USE_AVX // x86_64 only
15+
#undef USE_AVX2 // x86_64 only
16+
#undef USE_SSE2 // x86_64 only
17+
#undef USE_SSE41 // x86_64 only
18+
#undef USE_SSE42 // x86_64 only
19+
#undef USE_FMA // x86_64 only
20+
#ifdef USE_NEON
21+
typedef float32x4_t __m128;
22+
typedef int32x4_t __m128i;
23+
typedef float64x2_t __m128d;
24+
#else
25+
typedef struct {float a; float b; float c; float d;} __m128;
26+
typedef struct {int a; int b; int c; int d;} __m128i;
27+
typedef struct {double a; double b;} __m128d;
28+
#endif
29+
#else
30+
#undef USE_NEON // ARM64 only
731
#ifdef __FMA__
832
#define USE_FMA
933
#endif
34+
#if !defined(__SSE2__) && !defined(_MSC_VER)
35+
#error Compiler must support SSE2
36+
#endif
37+
#define USE_SSE2
1038

39+
#if defined(__aarch64__)
40+
#else
1141
#ifdef __AVX2__
1242
#define USE_AVX2
1343
#endif
@@ -24,7 +54,8 @@
2454
#ifdef __SSE4_2__
2555
#define USE_SSE42
2656
#endif
27-
57+
#endif
58+
#endif
2859

2960
#ifndef _MSC_VER
3061
#include <stdint.h>

include/SIMD.h

Lines changed: 47 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,46 @@
22

33
#include "Portable.h"
44

5+
#ifdef USE_SSE2
6+
#include <emmintrin.h>
7+
#if defined(USE_AVX) || defined(USE_AVX2)
8+
#include <immintrin.h>
9+
#else
10+
#ifdef USE_SSE41
11+
#include <smmintrin.h>
12+
#endif
13+
#endif
14+
#endif
15+
16+
namespace BinSearch {
17+
namespace Details {
18+
19+
template <InstrSet I, typename T>
20+
struct FTOITraits{};
21+
22+
template <InstrSet I, class T>
23+
struct FVec;
24+
25+
template <InstrSet I, class T>
26+
struct IVec;
27+
28+
template <InstrSet I, class T>
29+
struct FVec1;
30+
31+
template <> struct InstrFloatTraits<Scalar, float>
32+
{
33+
typedef __m128 vec_t;
34+
};
35+
36+
template <> struct InstrFloatTraits<Scalar, double>
37+
{
38+
typedef __m128d vec_t;
39+
};
40+
41+
}
42+
}
43+
44+
#if !defined(__aarch64__)
545
#ifdef USE_SSE42
646
#ifndef _MSC_VER
747
#include <popcntintrin.h>
@@ -26,29 +66,11 @@ FORCE_INLINE int popcnt32(int x32)
2666
} // namespace
2767
#endif
2868

29-
#if defined(USE_AVX) || defined(USE_AVX2)
30-
#include <immintrin.h>
31-
#else
32-
#include <emmintrin.h>
33-
#ifdef USE_SSE41
34-
#include <smmintrin.h>
35-
#endif
36-
#endif
37-
3869
#include "Type.h"
3970

4071
namespace BinSearch {
4172
namespace Details {
4273

43-
template <InstrSet I, class T>
44-
struct FVec;
45-
46-
template <InstrSet I, class T>
47-
struct IVec;
48-
49-
template <InstrSet I, class T>
50-
struct FVec1;
51-
5274
template <> struct InstrIntTraits<SSE>
5375
{
5476
typedef __m128i vec_t;
@@ -64,18 +86,8 @@ template <> struct InstrFloatTraits<SSE, double>
6486
typedef __m128d vec_t;
6587
};
6688

67-
template <> struct InstrFloatTraits<Scalar, float>
68-
{
69-
typedef float vec_t;
70-
};
71-
72-
template <> struct InstrFloatTraits<Scalar, double>
73-
{
74-
typedef double vec_t;
75-
};
76-
77-
template <InstrSet I, typename T>
78-
struct FTOITraits
89+
template <>
90+
struct FTOITraits<SSE, float>
7991
{
8092
typedef IVec<SSE, float> vec_t;
8193
};
@@ -295,9 +307,11 @@ FORCE_INLINE FVec<SSE,float> operator- (const FVec<SSE,float>& a, const FVec<
295307
FORCE_INLINE FVec<SSE,float> operator* (const FVec<SSE,float>& a, const FVec<SSE,float>& b) { return _mm_mul_ps( a, b ); }
296308
FORCE_INLINE FVec<SSE,float> operator/ (const FVec<SSE,float>& a, const FVec<SSE,float>& b) { return _mm_div_ps( a, b ); }
297309
FORCE_INLINE IVec<SSE,float> ftoi (const FVec<SSE,float>& a) { return _mm_cvttps_epi32(a); }
310+
#ifndef __clang__ // Conflicts with builtin operator
298311
FORCE_INLINE IVec<SSE,float> operator<= (const FVec<SSE,float>& a, const FVec<SSE,float>& b) { return _mm_castps_si128( _mm_cmple_ps( a, b ) ); }
299312
FORCE_INLINE IVec<SSE,float> operator>= (const FVec<SSE,float>& a, const FVec<SSE,float>& b) { return _mm_castps_si128( _mm_cmpge_ps( a, b ) ); }
300313
FORCE_INLINE IVec<SSE,float> operator< (const FVec<SSE,float>& a, const FVec<SSE,float>& b) { return _mm_castps_si128(_mm_cmplt_ps(a, b)); }
314+
#endif
301315
#ifdef USE_FMA
302316
FORCE_INLINE FVec<SSE, float> mulSub(const FVec<SSE, float>& a, const FVec<SSE, float>& b, const FVec<SSE, float>& c) { return _mm_fmsub_ps(a, b, c); }
303317
#endif
@@ -349,9 +363,11 @@ FORCE_INLINE FVec<SSE,double> operator- (const FVec<SSE,double>& a, const FVec
349363
FORCE_INLINE FVec<SSE,double> operator* (const FVec<SSE,double>& a, const FVec<SSE,double>& b) { return _mm_mul_pd( a, b ); }
350364
FORCE_INLINE FVec<SSE,double> operator/ (const FVec<SSE,double>& a, const FVec<SSE,double>& b) { return _mm_div_pd( a, b ); }
351365
FORCE_INLINE IVec<SSE,float> ftoi (const FVec<SSE,double>& a) { return _mm_cvttpd_epi32(a); }
366+
#ifndef __clang__ // Conflicts with builtin operator
352367
FORCE_INLINE IVec<SSE,double> operator<= (const FVec<SSE,double>& a, const FVec<SSE,double>& b) { return _mm_castpd_si128( _mm_cmple_pd( a, b ) ); }
353368
FORCE_INLINE IVec<SSE,double> operator< (const FVec<SSE,double>& a, const FVec<SSE,double>& b) { return _mm_castpd_si128(_mm_cmplt_pd(a, b)); }
354369
FORCE_INLINE IVec<SSE,double> operator>= (const FVec<SSE,double>& a, const FVec<SSE,double>& b) { return _mm_castpd_si128( _mm_cmpge_pd( a, b ) ); }
370+
#endif
355371
#ifdef USE_FMA
356372
FORCE_INLINE FVec<SSE, double> mulSub(const FVec<SSE, double>& a, const FVec<SSE, double>& b, const FVec<SSE, double>& c ) { return _mm_fmsub_pd(a, b, c); }
357373
#endif
@@ -570,3 +586,4 @@ FORCE_INLINE FVec<AVX, double> mulSub(const FVec<AVX, double>& a, const FVec<AVX
570586

571587
} // namepsace Details
572588
} // namespace BinSearch
589+
#endif // !defined(__aarch64__)

include/Type.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ using std::size_t;
1010

1111
namespace BinSearch {
1212

13-
enum InstrSet { Scalar, SSE, AVX };
13+
enum InstrSet { Scalar, SSE, AVX, Neon };
1414

1515
#define ALGOENUM(x, b) x,
1616
enum Algos

0 commit comments

Comments
 (0)