Skip to content

Commit fc0fc76

Browse files
authored
[X86] Finally handle target of __builtin_ia32_cmp[p|s][s|d] from avx into sse/sse2/avx (llvm#84136)
This patch relands llvm#67410 and fixes the cmpfail below: #include <immintrin.h> __attribute__((target("avx"))) void test(__m128 a, __m128 b) { _mm_cmp_ps(a, b, 14); } According to Intel SDM, SSE/SSE2 instructions cmp[p|s][s|d] are supported when imm8 is in range of [0, 7]
1 parent c58c827 commit fc0fc76

11 files changed

+316
-121
lines changed

clang/include/clang/Basic/BuiltinsX86.def

+4-4
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,8 @@ TARGET_BUILTIN(__builtin_ia32_minps, "V4fV4fV4f", "ncV:128:", "sse")
226226
TARGET_BUILTIN(__builtin_ia32_maxps, "V4fV4fV4f", "ncV:128:", "sse")
227227
TARGET_BUILTIN(__builtin_ia32_minss, "V4fV4fV4f", "ncV:128:", "sse")
228228
TARGET_BUILTIN(__builtin_ia32_maxss, "V4fV4fV4f", "ncV:128:", "sse")
229+
TARGET_BUILTIN(__builtin_ia32_cmpps, "V4fV4fV4fIc", "ncV:128:", "sse")
230+
TARGET_BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fIc", "ncV:128:", "sse")
229231

230232
TARGET_BUILTIN(__builtin_ia32_cmpeqpd, "V2dV2dV2d", "ncV:128:", "sse2")
231233
TARGET_BUILTIN(__builtin_ia32_cmpltpd, "V2dV2dV2d", "ncV:128:", "sse2")
@@ -243,6 +245,8 @@ TARGET_BUILTIN(__builtin_ia32_cmpneqsd, "V2dV2dV2d", "ncV:128:", "sse2")
243245
TARGET_BUILTIN(__builtin_ia32_cmpnltsd, "V2dV2dV2d", "ncV:128:", "sse2")
244246
TARGET_BUILTIN(__builtin_ia32_cmpnlesd, "V2dV2dV2d", "ncV:128:", "sse2")
245247
TARGET_BUILTIN(__builtin_ia32_cmpordsd, "V2dV2dV2d", "ncV:128:", "sse2")
248+
TARGET_BUILTIN(__builtin_ia32_cmpsd, "V2dV2dV2dIc", "ncV:128:", "sse2")
249+
TARGET_BUILTIN(__builtin_ia32_cmppd, "V2dV2dV2dIc", "ncV:128:", "sse2")
246250
TARGET_BUILTIN(__builtin_ia32_minpd, "V2dV2dV2d", "ncV:128:", "sse2")
247251
TARGET_BUILTIN(__builtin_ia32_maxpd, "V2dV2dV2d", "ncV:128:", "sse2")
248252
TARGET_BUILTIN(__builtin_ia32_minsd, "V2dV2dV2d", "ncV:128:", "sse2")
@@ -462,12 +466,8 @@ TARGET_BUILTIN(__builtin_ia32_blendvps256, "V8fV8fV8fV8f", "ncV:256:", "avx")
462466
TARGET_BUILTIN(__builtin_ia32_shufpd256, "V4dV4dV4dIi", "ncV:256:", "avx")
463467
TARGET_BUILTIN(__builtin_ia32_shufps256, "V8fV8fV8fIi", "ncV:256:", "avx")
464468
TARGET_BUILTIN(__builtin_ia32_dpps256, "V8fV8fV8fIc", "ncV:256:", "avx")
465-
TARGET_BUILTIN(__builtin_ia32_cmppd, "V2dV2dV2dIc", "ncV:128:", "avx")
466469
TARGET_BUILTIN(__builtin_ia32_cmppd256, "V4dV4dV4dIc", "ncV:256:", "avx")
467-
TARGET_BUILTIN(__builtin_ia32_cmpps, "V4fV4fV4fIc", "ncV:128:", "avx")
468470
TARGET_BUILTIN(__builtin_ia32_cmpps256, "V8fV8fV8fIc", "ncV:256:", "avx")
469-
TARGET_BUILTIN(__builtin_ia32_cmpsd, "V2dV2dV2dIc", "ncV:128:", "avx")
470-
TARGET_BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fIc", "ncV:128:", "avx")
471471
TARGET_BUILTIN(__builtin_ia32_vextractf128_pd256, "V2dV4dIi", "ncV:256:", "avx")
472472
TARGET_BUILTIN(__builtin_ia32_vextractf128_ps256, "V4fV8fIi", "ncV:256:", "avx")
473473
TARGET_BUILTIN(__builtin_ia32_vextractf128_si256, "V4iV8iIi", "ncV:256:", "avx")

clang/lib/CodeGen/CodeGenFunction.cpp

+19
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "clang/AST/StmtObjC.h"
3232
#include "clang/Basic/Builtins.h"
3333
#include "clang/Basic/CodeGenOptions.h"
34+
#include "clang/Basic/TargetBuiltins.h"
3435
#include "clang/Basic/TargetInfo.h"
3536
#include "clang/CodeGen/CGFunctionInfo.h"
3637
#include "clang/Frontend/FrontendDiagnostic.h"
@@ -2613,6 +2614,24 @@ void CGBuilderInserter::InsertHelper(
26132614
// called function.
26142615
void CodeGenFunction::checkTargetFeatures(const CallExpr *E,
26152616
const FunctionDecl *TargetDecl) {
2617+
// SemaChecking cannot handle below x86 builtins because they have different
2618+
// parameter ranges with different TargetAttribute of caller.
2619+
if (CGM.getContext().getTargetInfo().getTriple().isX86()) {
2620+
unsigned BuiltinID = TargetDecl->getBuiltinID();
2621+
if (BuiltinID == X86::BI__builtin_ia32_cmpps ||
2622+
BuiltinID == X86::BI__builtin_ia32_cmpss ||
2623+
BuiltinID == X86::BI__builtin_ia32_cmppd ||
2624+
BuiltinID == X86::BI__builtin_ia32_cmpsd) {
2625+
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(CurCodeDecl);
2626+
llvm::StringMap<bool> TargetFetureMap;
2627+
CGM.getContext().getFunctionFeatureMap(TargetFetureMap, FD);
2628+
llvm::APSInt Result =
2629+
*(E->getArg(2)->getIntegerConstantExpr(CGM.getContext()));
2630+
if (Result.getSExtValue() > 7 && !TargetFetureMap.lookup("avx"))
2631+
CGM.getDiags().Report(E->getBeginLoc(), diag::err_builtin_needs_feature)
2632+
<< TargetDecl->getDeclName() << "avx";
2633+
}
2634+
}
26162635
return checkTargetFeatures(E->getBeginLoc(), TargetDecl);
26172636
}
26182637

clang/lib/Headers/avxintrin.h

+8-20
Original file line numberDiff line numberDiff line change
@@ -1574,14 +1574,6 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
15741574
(__v4df)(__m256d)(b), (int)(mask)))
15751575

15761576
/* Compare */
1577-
#define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */
1578-
#define _CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */
1579-
#define _CMP_LE_OS 0x02 /* Less-than-or-equal (ordered, signaling) */
1580-
#define _CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */
1581-
#define _CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */
1582-
#define _CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */
1583-
#define _CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered, signaling) */
1584-
#define _CMP_ORD_Q 0x07 /* Ordered (non-signaling) */
15851577
#define _CMP_EQ_UQ 0x08 /* Equal (unordered, non-signaling) */
15861578
#define _CMP_NGE_US 0x09 /* Not-greater-than-or-equal (unordered, signaling) */
15871579
#define _CMP_NGT_US 0x0a /* Not-greater-than (unordered, signaling) */
@@ -1607,6 +1599,7 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
16071599
#define _CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling) */
16081600
#define _CMP_TRUE_US 0x1f /* True (unordered, signaling) */
16091601

1602+
/* Below intrinsic defined in emmintrin.h can be used for AVX */
16101603
/// Compares each of the corresponding double-precision values of two
16111604
/// 128-bit vectors of [2 x double], using the operation specified by the
16121605
/// immediate integer operand.
@@ -1663,10 +1656,9 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
16631656
/// 0x1E: Greater-than (ordered, non-signaling) \n
16641657
/// 0x1F: True (unordered, signaling)
16651658
/// \returns A 128-bit vector of [2 x double] containing the comparison results.
1666-
#define _mm_cmp_pd(a, b, c) \
1667-
((__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), \
1668-
(__v2df)(__m128d)(b), (c)))
1659+
/// \fn __m128d _mm_cmp_pd(__m128d a, __m128d b, const int c)
16691660

1661+
/* Below intrinsic defined in xmmintrin.h can be used for AVX */
16701662
/// Compares each of the corresponding values of two 128-bit vectors of
16711663
/// [4 x float], using the operation specified by the immediate integer
16721664
/// operand.
@@ -1723,9 +1715,7 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
17231715
/// 0x1E: Greater-than (ordered, non-signaling) \n
17241716
/// 0x1F: True (unordered, signaling)
17251717
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
1726-
#define _mm_cmp_ps(a, b, c) \
1727-
((__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), \
1728-
(__v4sf)(__m128)(b), (c)))
1718+
/// \fn __m128 _mm_cmp_ps(__m128 a, __m128 b, const int c)
17291719

17301720
/// Compares each of the corresponding double-precision values of two
17311721
/// 256-bit vectors of [4 x double], using the operation specified by the
@@ -1847,6 +1837,7 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
18471837
((__m256)__builtin_ia32_cmpps256((__v8sf)(__m256)(a), \
18481838
(__v8sf)(__m256)(b), (c)))
18491839

1840+
/* Below intrinsic defined in emmintrin.h can be used for AVX */
18501841
/// Compares each of the corresponding scalar double-precision values of
18511842
/// two 128-bit vectors of [2 x double], using the operation specified by the
18521843
/// immediate integer operand.
@@ -1902,10 +1893,9 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
19021893
/// 0x1E: Greater-than (ordered, non-signaling) \n
19031894
/// 0x1F: True (unordered, signaling)
19041895
/// \returns A 128-bit vector of [2 x double] containing the comparison results.
1905-
#define _mm_cmp_sd(a, b, c) \
1906-
((__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), \
1907-
(__v2df)(__m128d)(b), (c)))
1896+
/// \fn __m128d _mm_cmp_sd(__m128d a, __m128d b, const int c)
19081897

1898+
/* Below intrinsic defined in xmmintrin.h can be used for AVX */
19091899
/// Compares each of the corresponding scalar values of two 128-bit
19101900
/// vectors of [4 x float], using the operation specified by the immediate
19111901
/// integer operand.
@@ -1961,9 +1951,7 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
19611951
/// 0x1E: Greater-than (ordered, non-signaling) \n
19621952
/// 0x1F: True (unordered, signaling)
19631953
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
1964-
#define _mm_cmp_ss(a, b, c) \
1965-
((__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), \
1966-
(__v4sf)(__m128)(b), (c)))
1954+
/// \fn __m128 _mm_cmp_ss(__m128 a, __m128 b, const int c)
19671955

19681956
/// Takes a [8 x i32] vector and returns the vector element value
19691957
/// indexed by the immediate constant operand.

clang/lib/Headers/emmintrin.h

+68
Original file line numberDiff line numberDiff line change
@@ -4745,6 +4745,74 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castsi128_pd(__m128i __a) {
47454745
return (__m128d)__a;
47464746
}
47474747

4748+
/// Compares each of the corresponding double-precision values of two
4749+
/// 128-bit vectors of [2 x double], using the operation specified by the
4750+
/// immediate integer operand.
4751+
///
4752+
/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
4753+
///
4754+
/// \headerfile <x86intrin.h>
4755+
///
4756+
/// \code
4757+
/// __m128d _mm_cmp_pd(__m128d a, __m128d b, const int c);
4758+
/// \endcode
4759+
///
4760+
/// This intrinsic corresponds to the <c> (V)CMPPD </c> instruction.
4761+
///
4762+
/// \param a
4763+
/// A 128-bit vector of [2 x double].
4764+
/// \param b
4765+
/// A 128-bit vector of [2 x double].
4766+
/// \param c
4767+
/// An immediate integer operand, with bits [4:0] specifying which comparison
4768+
/// operation to use: \n
4769+
/// 0x00: Equal (ordered, non-signaling) \n
4770+
/// 0x01: Less-than (ordered, signaling) \n
4771+
/// 0x02: Less-than-or-equal (ordered, signaling) \n
4772+
/// 0x03: Unordered (non-signaling) \n
4773+
/// 0x04: Not-equal (unordered, non-signaling) \n
4774+
/// 0x05: Not-less-than (unordered, signaling) \n
4775+
/// 0x06: Not-less-than-or-equal (unordered, signaling) \n
4776+
/// 0x07: Ordered (non-signaling) \n
4777+
/// \returns A 128-bit vector of [2 x double] containing the comparison results.
4778+
#define _mm_cmp_pd(a, b, c) \
4779+
((__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
4780+
(c)))
4781+
4782+
/// Compares each of the corresponding scalar double-precision values of
4783+
/// two 128-bit vectors of [2 x double], using the operation specified by the
4784+
/// immediate integer operand.
4785+
///
4786+
/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
4787+
///
4788+
/// \headerfile <x86intrin.h>
4789+
///
4790+
/// \code
4791+
/// __m128d _mm_cmp_sd(__m128d a, __m128d b, const int c);
4792+
/// \endcode
4793+
///
4794+
/// This intrinsic corresponds to the <c> (V)CMPSD </c> instruction.
4795+
///
4796+
/// \param a
4797+
/// A 128-bit vector of [2 x double].
4798+
/// \param b
4799+
/// A 128-bit vector of [2 x double].
4800+
/// \param c
4801+
/// An immediate integer operand, with bits [4:0] specifying which comparison
4802+
/// operation to use: \n
4803+
/// 0x00: Equal (ordered, non-signaling) \n
4804+
/// 0x01: Less-than (ordered, signaling) \n
4805+
/// 0x02: Less-than-or-equal (ordered, signaling) \n
4806+
/// 0x03: Unordered (non-signaling) \n
4807+
/// 0x04: Not-equal (unordered, non-signaling) \n
4808+
/// 0x05: Not-less-than (unordered, signaling) \n
4809+
/// 0x06: Not-less-than-or-equal (unordered, signaling) \n
4810+
/// 0x07: Ordered (non-signaling) \n
4811+
/// \returns A 128-bit vector of [2 x double] containing the comparison results.
4812+
#define _mm_cmp_sd(a, b, c) \
4813+
((__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
4814+
(c)))
4815+
47484816
#if defined(__cplusplus)
47494817
extern "C" {
47504818
#endif

clang/lib/Headers/xmmintrin.h

+75
Original file line numberDiff line numberDiff line change
@@ -2940,6 +2940,81 @@ _mm_movemask_ps(__m128 __a)
29402940
return __builtin_ia32_movmskps((__v4sf)__a);
29412941
}
29422942

2943+
/* Compare */
2944+
#define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */
2945+
#define _CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */
2946+
#define _CMP_LE_OS 0x02 /* Less-than-or-equal (ordered, signaling) */
2947+
#define _CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */
2948+
#define _CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */
2949+
#define _CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */
2950+
#define _CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered, signaling) */
2951+
#define _CMP_ORD_Q 0x07 /* Ordered (non-signaling) */
2952+
2953+
/// Compares each of the corresponding values of two 128-bit vectors of
2954+
/// [4 x float], using the operation specified by the immediate integer
2955+
/// operand.
2956+
///
2957+
/// Each comparison yields 0x0 for false, 0xFFFFFFFF for true.
2958+
///
2959+
/// \headerfile <x86intrin.h>
2960+
///
2961+
/// \code
2962+
/// __m128 _mm_cmp_ps(__m128 a, __m128 b, const int c);
2963+
/// \endcode
2964+
///
2965+
/// This intrinsic corresponds to the <c> (V)CMPPS </c> instruction.
2966+
///
2967+
/// \param a
2968+
/// A 128-bit vector of [4 x float].
2969+
/// \param b
2970+
/// A 128-bit vector of [4 x float].
2971+
/// \param c
2972+
/// An immediate integer operand, with bits [4:0] specifying which comparison
2973+
/// operation to use: \n
2974+
/// 0x00: Equal (ordered, non-signaling) \n
2975+
/// 0x01: Less-than (ordered, signaling) \n
2976+
/// 0x02: Less-than-or-equal (ordered, signaling) \n
2977+
/// 0x03: Unordered (non-signaling) \n
2978+
/// 0x04: Not-equal (unordered, non-signaling) \n
2979+
/// 0x05: Not-less-than (unordered, signaling) \n
2980+
/// 0x06: Not-less-than-or-equal (unordered, signaling) \n
2981+
/// 0x07: Ordered (non-signaling) \n
2982+
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
2983+
#define _mm_cmp_ps(a, b, c) \
2984+
((__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), (c)))
2985+
2986+
/// Compares each of the corresponding scalar values of two 128-bit
2987+
/// vectors of [4 x float], using the operation specified by the immediate
2988+
/// integer operand.
2989+
///
2990+
/// Each comparison yields 0x0 for false, 0xFFFFFFFF for true.
2991+
///
2992+
/// \headerfile <x86intrin.h>
2993+
///
2994+
/// \code
2995+
/// __m128 _mm_cmp_ss(__m128 a, __m128 b, const int c);
2996+
/// \endcode
2997+
///
2998+
/// This intrinsic corresponds to the <c> (V)CMPSS </c> instruction.
2999+
///
3000+
/// \param a
3001+
/// A 128-bit vector of [4 x float].
3002+
/// \param b
3003+
/// A 128-bit vector of [4 x float].
3004+
/// \param c
3005+
/// An immediate integer operand, with bits [4:0] specifying which comparison
3006+
/// operation to use: \n
3007+
/// 0x00: Equal (ordered, non-signaling) \n
3008+
/// 0x01: Less-than (ordered, signaling) \n
3009+
/// 0x02: Less-than-or-equal (ordered, signaling) \n
3010+
/// 0x03: Unordered (non-signaling) \n
3011+
/// 0x04: Not-equal (unordered, non-signaling) \n
3012+
/// 0x05: Not-less-than (unordered, signaling) \n
3013+
/// 0x06: Not-less-than-or-equal (unordered, signaling) \n
3014+
/// 0x07: Ordered (non-signaling) \n
3015+
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
3016+
#define _mm_cmp_ss(a, b, c) \
3017+
((__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), (c)))
29433018

29443019
#define _MM_ALIGN16 __attribute__((aligned(16)))
29453020

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown-emit-llvm -o /dev/null -verify
2+
// RUN: %clang_cc1 %s -ffreestanding -triple=i386-unknown-unknown-emit-llvm -o /dev/null -verify
3+
4+
// expected-no-diagnostics
5+
6+
#include <immintrin.h>
7+
8+
__attribute__((target("avx")))
9+
__m128 test(__m128 a, __m128 b) {
10+
return _mm_cmp_ps(a, b, 14);
11+
}

0 commit comments

Comments
 (0)