2
2
3
3
#include " Portable.h"
4
4
5
+ #ifdef USE_SSE2
6
+ #include < emmintrin.h>
7
+ #if defined(USE_AVX) || defined(USE_AVX2)
8
+ #include < immintrin.h>
9
+ #else
10
+ #ifdef USE_SSE41
11
+ #include < smmintrin.h>
12
+ #endif
13
+ #endif
14
+ #endif
15
+
16
+ namespace BinSearch {
17
+ namespace Details {
18
+
19
+ template <InstrSet I, typename T>
20
+ struct FTOITraits {};
21
+
22
+ template <InstrSet I, class T >
23
+ struct FVec ;
24
+
25
+ template <InstrSet I, class T >
26
+ struct IVec ;
27
+
28
+ template <InstrSet I, class T >
29
+ struct FVec1 ;
30
+
31
+ template <> struct InstrFloatTraits <Scalar, float >
32
+ {
33
+ typedef __m128 vec_t ;
34
+ };
35
+
36
+ template <> struct InstrFloatTraits <Scalar, double >
37
+ {
38
+ typedef __m128d vec_t ;
39
+ };
40
+
41
+ }
42
+ }
43
+
44
+ #if !defined(__aarch64__)
5
45
#ifdef USE_SSE42
6
46
#ifndef _MSC_VER
7
47
#include < popcntintrin.h>
@@ -26,29 +66,11 @@ FORCE_INLINE int popcnt32(int x32)
26
66
} // namespace
27
67
#endif
28
68
29
- #if defined(USE_AVX) || defined(USE_AVX2)
30
- #include < immintrin.h>
31
- #else
32
- #include < emmintrin.h>
33
- #ifdef USE_SSE41
34
- #include < smmintrin.h>
35
- #endif
36
- #endif
37
-
38
69
#include " Type.h"
39
70
40
71
namespace BinSearch {
41
72
namespace Details {
42
73
43
- template <InstrSet I, class T >
44
- struct FVec ;
45
-
46
- template <InstrSet I, class T >
47
- struct IVec ;
48
-
49
- template <InstrSet I, class T >
50
- struct FVec1 ;
51
-
52
74
template <> struct InstrIntTraits <SSE>
53
75
{
54
76
typedef __m128i vec_t ;
@@ -64,18 +86,8 @@ template <> struct InstrFloatTraits<SSE, double>
64
86
typedef __m128d vec_t ;
65
87
};
66
88
67
- template <> struct InstrFloatTraits <Scalar, float >
68
- {
69
- typedef float vec_t ;
70
- };
71
-
72
- template <> struct InstrFloatTraits <Scalar, double >
73
- {
74
- typedef double vec_t ;
75
- };
76
-
77
- template <InstrSet I, typename T>
78
- struct FTOITraits
89
+ template <>
90
+ struct FTOITraits <SSE, float >
79
91
{
80
92
typedef IVec<SSE, float > vec_t ;
81
93
};
@@ -295,9 +307,11 @@ FORCE_INLINE FVec<SSE,float> operator- (const FVec<SSE,float>& a, const FVec<
295
307
FORCE_INLINE FVec<SSE,float > operator * (const FVec<SSE,float >& a, const FVec<SSE,float >& b) { return _mm_mul_ps ( a, b ); }
296
308
FORCE_INLINE FVec<SSE,float > operator / (const FVec<SSE,float >& a, const FVec<SSE,float >& b) { return _mm_div_ps ( a, b ); }
297
309
FORCE_INLINE IVec<SSE,float > ftoi (const FVec<SSE,float >& a) { return _mm_cvttps_epi32 (a); }
310
+ #ifndef __clang__ // Conflicts with builtin operator
298
311
FORCE_INLINE IVec<SSE,float > operator <= (const FVec<SSE,float >& a, const FVec<SSE,float >& b) { return _mm_castps_si128 ( _mm_cmple_ps ( a, b ) ); }
299
312
FORCE_INLINE IVec<SSE,float > operator >= (const FVec<SSE,float >& a, const FVec<SSE,float >& b) { return _mm_castps_si128 ( _mm_cmpge_ps ( a, b ) ); }
300
313
FORCE_INLINE IVec<SSE,float > operator < (const FVec<SSE,float >& a, const FVec<SSE,float >& b) { return _mm_castps_si128 (_mm_cmplt_ps (a, b)); }
314
+ #endif
301
315
#ifdef USE_FMA
302
316
FORCE_INLINE FVec<SSE, float > mulSub (const FVec<SSE, float >& a, const FVec<SSE, float >& b, const FVec<SSE, float >& c) { return _mm_fmsub_ps (a, b, c); }
303
317
#endif
@@ -349,9 +363,11 @@ FORCE_INLINE FVec<SSE,double> operator- (const FVec<SSE,double>& a, const FVec
349
363
FORCE_INLINE FVec<SSE,double > operator * (const FVec<SSE,double >& a, const FVec<SSE,double >& b) { return _mm_mul_pd ( a, b ); }
350
364
FORCE_INLINE FVec<SSE,double > operator / (const FVec<SSE,double >& a, const FVec<SSE,double >& b) { return _mm_div_pd ( a, b ); }
351
365
FORCE_INLINE IVec<SSE,float > ftoi (const FVec<SSE,double >& a) { return _mm_cvttpd_epi32 (a); }
366
+ #ifndef __clang__ // Conflicts with builtin operator
352
367
FORCE_INLINE IVec<SSE,double > operator <= (const FVec<SSE,double >& a, const FVec<SSE,double >& b) { return _mm_castpd_si128 ( _mm_cmple_pd ( a, b ) ); }
353
368
FORCE_INLINE IVec<SSE,double > operator < (const FVec<SSE,double >& a, const FVec<SSE,double >& b) { return _mm_castpd_si128 (_mm_cmplt_pd (a, b)); }
354
369
FORCE_INLINE IVec<SSE,double > operator >= (const FVec<SSE,double >& a, const FVec<SSE,double >& b) { return _mm_castpd_si128 ( _mm_cmpge_pd ( a, b ) ); }
370
+ #endif
355
371
#ifdef USE_FMA
356
372
FORCE_INLINE FVec<SSE, double > mulSub (const FVec<SSE, double >& a, const FVec<SSE, double >& b, const FVec<SSE, double >& c ) { return _mm_fmsub_pd (a, b, c); }
357
373
#endif
@@ -570,3 +586,4 @@ FORCE_INLINE FVec<AVX, double> mulSub(const FVec<AVX, double>& a, const FVec<AVX
570
586
571
587
} // namepsace Details
572
588
} // namespace BinSearch
589
+ #endif // !defined(__aarch64__)
0 commit comments