Skip to content

Commit dbcef6f

Browse files
FreddyLeafcjdb
authored andcommitted
[X86][AVX10.2] Support AVX10.2-CONVERT new instructions. (llvm#101600)
Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965
1 parent f11dc11 commit dbcef6f

24 files changed

+13806
-64
lines changed

clang/include/clang/Basic/BuiltinsX86.def

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2217,6 +2217,50 @@ TARGET_BUILTIN(__builtin_ia32_vcvttps2ibs512_mask, "V16UiV16fV16UiUsIi", "nV:512
22172217
TARGET_BUILTIN(__builtin_ia32_vcvttps2iubs128_mask, "V4UiV4fV4UiUc", "nV:128:", "avx10.2-256")
22182218
TARGET_BUILTIN(__builtin_ia32_vcvttps2iubs256_mask, "V8UiV8fV8UiUcIi", "nV:256:", "avx10.2-256")
22192219
TARGET_BUILTIN(__builtin_ia32_vcvttps2iubs512_mask, "V16UiV16fV16UiUsIi", "nV:512:", "avx10.2-512")
2220+
2221+
// AVX10.2 CONVERT
2222+
TARGET_BUILTIN(__builtin_ia32_vcvt2ps2phx128_mask, "V8xV4fV4fV8xUc", "ncV:128:", "avx10.2-256")
2223+
TARGET_BUILTIN(__builtin_ia32_vcvt2ps2phx256_mask, "V16xV8fV8fV16xUsIi", "ncV:256:", "avx10.2-256")
2224+
TARGET_BUILTIN(__builtin_ia32_vcvt2ps2phx512_mask, "V32xV16fV16fV32xUiIi", "ncV:512:", "avx10.2-512")
2225+
TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2bf8_128_mask, "V16cV16cV8xV16cUc", "nV:128:", "avx10.2-256")
2226+
TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2bf8_256_mask, "V16cV32cV16xV16cUs", "nV:256:", "avx10.2-256")
2227+
TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2bf8_512_mask, "V32cV64cV32xV32cUi", "nV:512:", "avx10.2-512")
2228+
TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2bf8s_128_mask, "V16cV16cV8xV16cUc", "nV:128:", "avx10.2-256")
2229+
TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2bf8s_256_mask, "V16cV32cV16xV16cUs", "nV:256:", "avx10.2-256")
2230+
TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2bf8s_512_mask, "V32cV64cV32xV32cUi", "nV:512:", "avx10.2-512")
2231+
TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2hf8_128_mask, "V16cV16cV8xV16cUc", "nV:128:", "avx10.2-256")
2232+
TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2hf8_256_mask, "V16cV32cV16xV16cUs", "nV:256:", "avx10.2-256")
2233+
TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2hf8_512_mask, "V32cV64cV32xV32cUi", "nV:512:", "avx10.2-512")
2234+
TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2hf8s_128_mask, "V16cV16cV8xV16cUc", "nV:128:", "avx10.2-256")
2235+
TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2hf8s_256_mask, "V16cV32cV16xV16cUs", "nV:256:", "avx10.2-256")
2236+
TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2hf8s_512_mask, "V32cV64cV32xV32cUi", "nV:512:", "avx10.2-512")
2237+
TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2bf8_128, "V16cV8xV8x", "nV:128:", "avx10.2-256")
2238+
TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2bf8_256, "V32cV16xV16x", "nV:256:", "avx10.2-256")
2239+
TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2bf8_512, "V64cV32xV32x", "nV:512:", "avx10.2-512")
2240+
TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2bf8s_128, "V16cV8xV8x", "nV:128:", "avx10.2-256")
2241+
TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2bf8s_256, "V32cV16xV16x", "nV:256:", "avx10.2-256")
2242+
TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2bf8s_512, "V64cV32xV32x", "nV:512:", "avx10.2-512")
2243+
TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2hf8_128, "V16cV8xV8x", "nV:128:", "avx10.2-256")
2244+
TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2hf8_256, "V32cV16xV16x", "nV:256:", "avx10.2-256")
2245+
TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2hf8_512, "V64cV32xV32x", "nV:512:", "avx10.2-512")
2246+
TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2hf8s_128, "V16cV8xV8x", "nV:128:", "avx10.2-256")
2247+
TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2hf8s_256, "V32cV16xV16x", "nV:256:", "avx10.2-256")
2248+
TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2hf8s_512, "V64cV32xV32x", "nV:512:", "avx10.2-512")
2249+
TARGET_BUILTIN(__builtin_ia32_vcvthf8_2ph128_mask, "V8xV16cV8xUc", "nV:128:", "avx10.2-256")
2250+
TARGET_BUILTIN(__builtin_ia32_vcvthf8_2ph256_mask, "V16xV16cV16xUs", "nV:256:", "avx10.2-256")
2251+
TARGET_BUILTIN(__builtin_ia32_vcvthf8_2ph512_mask, "V32xV32cV32xUi", "nV:512:", "avx10.2-512")
2252+
TARGET_BUILTIN(__builtin_ia32_vcvtneph2bf8_128_mask, "V16cV8xV16cUc", "nV:128:", "avx10.2-256")
2253+
TARGET_BUILTIN(__builtin_ia32_vcvtneph2bf8_256_mask, "V16cV16xV16cUs", "nV:256:", "avx10.2-256")
2254+
TARGET_BUILTIN(__builtin_ia32_vcvtneph2bf8_512_mask, "V32cV32xV32cUi", "nV:512:", "avx10.2-512")
2255+
TARGET_BUILTIN(__builtin_ia32_vcvtneph2bf8s_128_mask, "V16cV8xV16cUc", "nV:128:", "avx10.2-256")
2256+
TARGET_BUILTIN(__builtin_ia32_vcvtneph2bf8s_256_mask, "V16cV16xV16cUs", "nV:256:", "avx10.2-256")
2257+
TARGET_BUILTIN(__builtin_ia32_vcvtneph2bf8s_512_mask, "V32cV32xV32cUi", "nV:512:", "avx10.2-512")
2258+
TARGET_BUILTIN(__builtin_ia32_vcvtneph2hf8_128_mask, "V16cV8xV16cUc", "nV:128:", "avx10.2-256")
2259+
TARGET_BUILTIN(__builtin_ia32_vcvtneph2hf8_256_mask, "V16cV16xV16cUs", "nV:256:", "avx10.2-256")
2260+
TARGET_BUILTIN(__builtin_ia32_vcvtneph2hf8_512_mask, "V32cV32xV32cUi", "nV:512:", "avx10.2-512")
2261+
TARGET_BUILTIN(__builtin_ia32_vcvtneph2hf8s_128_mask, "V16cV8xV16cUc", "nV:128:", "avx10.2-256")
2262+
TARGET_BUILTIN(__builtin_ia32_vcvtneph2hf8s_256_mask, "V16cV16xV16cUs", "nV:256:", "avx10.2-256")
2263+
TARGET_BUILTIN(__builtin_ia32_vcvtneph2hf8s_512_mask, "V32cV32xV32cUi", "nV:512:", "avx10.2-512")
22202264
#undef BUILTIN
22212265
#undef TARGET_BUILTIN
22222266
#undef TARGET_HEADER_BUILTIN

clang/lib/Headers/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,9 +147,11 @@ set(x86_files
147147
amxcomplexintrin.h
148148
amxfp16intrin.h
149149
amxintrin.h
150+
avx10_2_512convertintrin.h
150151
avx10_2_512minmaxintrin.h
151152
avx10_2_512niintrin.h
152153
avx10_2_512satcvtintrin.h
154+
avx10_2convertintrin.h
153155
avx10_2minmaxintrin.h
154156
avx10_2niintrin.h
155157
avx10_2satcvtintrin.h
Lines changed: 325 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,325 @@
1+
/*===--------- avx10_2_512convertintrin.h - AVX10_2_512CONVERT -------------===
2+
*
3+
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
* See https://llvm.org/LICENSE.txt for license information.
5+
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
*
7+
*===-----------------------------------------------------------------------===
8+
*/
9+
#ifndef __IMMINTRIN_H
10+
#error \
11+
"Never use <avx10_2_512convertintrin.h> directly; include <immintrin.h> instead."
12+
#endif // __IMMINTRIN_H
13+
14+
#ifdef __SSE2__
15+
16+
#ifndef __AVX10_2_512CONVERTINTRIN_H
17+
#define __AVX10_2_512CONVERTINTRIN_H
18+
19+
/* Define the default attributes for the functions in this file. */
20+
#define __DEFAULT_FN_ATTRS512 \
21+
__attribute__((__always_inline__, __nodebug__, __target__("avx10.2-512"), \
22+
__min_vector_width__(512)))
23+
24+
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_cvtx2ps_ph(__m512 __A,
25+
__m512 __B) {
26+
return (__m512h)__builtin_ia32_vcvt2ps2phx512_mask(
27+
(__v16sf)__A, (__v16sf)__B, (__v32hf)_mm512_setzero_ph(), (__mmask32)(-1),
28+
_MM_FROUND_CUR_DIRECTION);
29+
}
30+
31+
static __inline__ __m512h __DEFAULT_FN_ATTRS512
32+
_mm512_mask_cvtx2ps_ph(__m512h __W, __mmask32 __U, __m512 __A, __m512 __B) {
33+
return (__m512h)__builtin_ia32_vcvt2ps2phx512_mask(
34+
(__v16sf)__A, (__v16sf)__B, (__v32hf)__W, (__mmask32)__U,
35+
_MM_FROUND_CUR_DIRECTION);
36+
}
37+
38+
static __inline__ __m512h __DEFAULT_FN_ATTRS512
39+
_mm512_maskz_cvtx2ps_ph(__mmask32 __U, __m512 __A, __m512 __B) {
40+
return (__m512h)__builtin_ia32_vcvt2ps2phx512_mask(
41+
(__v16sf)__A, (__v16sf)__B, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U,
42+
_MM_FROUND_CUR_DIRECTION);
43+
}
44+
45+
#define _mm512_cvtx_round2ps_ph(A, B, R) \
46+
((__m512h)__builtin_ia32_vcvt2ps2phx512_mask( \
47+
(__v16sf)(A), (__v16sf)(B), (__v32hf)_mm512_undefined_ph(), \
48+
(__mmask32)(-1), (const int)(R)))
49+
50+
#define _mm512_mask_cvtx_round2ps_ph(W, U, A, B, R) \
51+
((__m512h)__builtin_ia32_vcvt2ps2phx512_mask((__v16sf)(A), (__v16sf)(B), \
52+
(__v32hf)(W), (__mmask32)(U), \
53+
(const int)(R)))
54+
55+
#define _mm512_maskz_cvtx_round2ps_ph(U, A, B, R) \
56+
((__m512h)__builtin_ia32_vcvt2ps2phx512_mask( \
57+
(__v16sf)(A), (__v16sf)(B), (__v32hf)_mm512_setzero_ph(), \
58+
(__mmask32)(U), (const int)(R)))
59+
60+
static __inline__ __m256i __DEFAULT_FN_ATTRS512
61+
_mm512_cvtbiasph_pbf8(__m512i __A, __m512h __B) {
62+
return (__m256i)__builtin_ia32_vcvtbiasph2bf8_512_mask(
63+
(__v64qi)__A, (__v32hf)__B, (__v32qi)_mm256_undefined_si256(),
64+
(__mmask32)-1);
65+
}
66+
67+
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtbiasph_pbf8(
68+
__m256i __W, __mmask32 __U, __m512i __A, __m512h __B) {
69+
return (__m256i)__builtin_ia32_vcvtbiasph2bf8_512_mask(
70+
(__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)__W, (__mmask32)__U);
71+
}
72+
73+
static __inline__ __m256i __DEFAULT_FN_ATTRS512
74+
_mm512_maskz_cvtbiasph_pbf8(__mmask32 __U, __m512i __A, __m512h __B) {
75+
return (__m256i)__builtin_ia32_vcvtbiasph2bf8_512_mask(
76+
(__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)_mm256_setzero_si256(),
77+
(__mmask32)__U);
78+
}
79+
80+
static __inline__ __m256i __DEFAULT_FN_ATTRS512
81+
_mm512_cvtbiassph_pbf8(__m512i __A, __m512h __B) {
82+
return (__m256i)__builtin_ia32_vcvtbiasph2bf8s_512_mask(
83+
(__v64qi)__A, (__v32hf)__B, (__v32qi)_mm256_undefined_si256(),
84+
(__mmask32)-1);
85+
}
86+
87+
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtbiassph_pbf8(
88+
__m256i __W, __mmask32 __U, __m512i __A, __m512h __B) {
89+
return (__m256i)__builtin_ia32_vcvtbiasph2bf8s_512_mask(
90+
(__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)__W, (__mmask32)__U);
91+
}
92+
93+
static __inline__ __m256i __DEFAULT_FN_ATTRS512
94+
_mm512_maskz_cvtbiassph_pbf8(__mmask32 __U, __m512i __A, __m512h __B) {
95+
return (__m256i)__builtin_ia32_vcvtbiasph2bf8s_512_mask(
96+
(__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)_mm256_setzero_si256(),
97+
(__mmask32)__U);
98+
}
99+
100+
static __inline__ __m256i __DEFAULT_FN_ATTRS512
101+
_mm512_cvtbiasph_phf8(__m512i __A, __m512h __B) {
102+
return (__m256i)__builtin_ia32_vcvtbiasph2hf8_512_mask(
103+
(__v64qi)__A, (__v32hf)__B, (__v32qi)_mm256_undefined_si256(),
104+
(__mmask32)-1);
105+
}
106+
107+
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtbiasph_phf8(
108+
__m256i __W, __mmask32 __U, __m512i __A, __m512h __B) {
109+
return (__m256i)__builtin_ia32_vcvtbiasph2hf8_512_mask(
110+
(__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)__W, (__mmask32)__U);
111+
}
112+
113+
static __inline__ __m256i __DEFAULT_FN_ATTRS512
114+
_mm512_maskz_cvtbiasph_phf8(__mmask32 __U, __m512i __A, __m512h __B) {
115+
return (__m256i)__builtin_ia32_vcvtbiasph2hf8_512_mask(
116+
(__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)_mm256_setzero_si256(),
117+
(__mmask32)__U);
118+
}
119+
120+
static __inline__ __m256i __DEFAULT_FN_ATTRS512
121+
_mm512_cvtbiassph_phf8(__m512i __A, __m512h __B) {
122+
return (__m256i)__builtin_ia32_vcvtbiasph2hf8s_512_mask(
123+
(__v64qi)__A, (__v32hf)__B, (__v32qi)_mm256_undefined_si256(),
124+
(__mmask32)-1);
125+
}
126+
127+
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtbiassph_phf8(
128+
__m256i __W, __mmask32 __U, __m512i __A, __m512h __B) {
129+
return (__m256i)__builtin_ia32_vcvtbiasph2hf8s_512_mask(
130+
(__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)__W, (__mmask32)__U);
131+
}
132+
133+
static __inline__ __m256i __DEFAULT_FN_ATTRS512
134+
_mm512_maskz_cvtbiassph_phf8(__mmask32 __U, __m512i __A, __m512h __B) {
135+
return (__m256i)__builtin_ia32_vcvtbiasph2hf8s_512_mask(
136+
(__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)_mm256_setzero_si256(),
137+
(__mmask32)__U);
138+
}
139+
140+
static __inline__ __m512i __DEFAULT_FN_ATTRS512
141+
_mm512_cvtne2ph_pbf8(__m512h __A, __m512h __B) {
142+
return (__m512i)__builtin_ia32_vcvtne2ph2bf8_512((__v32hf)(__A),
143+
(__v32hf)(__B));
144+
}
145+
146+
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtne2ph_pbf8(
147+
__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) {
148+
return (__m512i)__builtin_ia32_selectb_512(
149+
(__mmask64)__U, (__v64qi)_mm512_cvtne2ph_pbf8(__A, __B), (__v64qi)__W);
150+
}
151+
152+
static __inline__ __m512i __DEFAULT_FN_ATTRS512
153+
_mm512_maskz_cvtne2ph_pbf8(__mmask64 __U, __m512h __A, __m512h __B) {
154+
return (__m512i)__builtin_ia32_selectb_512(
155+
(__mmask64)__U, (__v64qi)_mm512_cvtne2ph_pbf8(__A, __B),
156+
(__v64qi)(__m512i)_mm512_setzero_si512());
157+
}
158+
159+
static __inline__ __m512i __DEFAULT_FN_ATTRS512
160+
_mm512_cvtnes2ph_pbf8(__m512h __A, __m512h __B) {
161+
return (__m512i)__builtin_ia32_vcvtne2ph2bf8s_512((__v32hf)(__A),
162+
(__v32hf)(__B));
163+
}
164+
165+
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtnes2ph_pbf8(
166+
__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) {
167+
return (__m512i)__builtin_ia32_selectb_512(
168+
(__mmask64)__U, (__v64qi)_mm512_cvtnes2ph_pbf8(__A, __B), (__v64qi)__W);
169+
}
170+
171+
static __inline__ __m512i __DEFAULT_FN_ATTRS512
172+
_mm512_maskz_cvtnes2ph_pbf8(__mmask64 __U, __m512h __A, __m512h __B) {
173+
return (__m512i)__builtin_ia32_selectb_512(
174+
(__mmask64)__U, (__v64qi)_mm512_cvtnes2ph_pbf8(__A, __B),
175+
(__v64qi)(__m512i)_mm512_setzero_si512());
176+
}
177+
178+
static __inline__ __m512i __DEFAULT_FN_ATTRS512
179+
_mm512_cvtne2ph_phf8(__m512h __A, __m512h __B) {
180+
return (__m512i)__builtin_ia32_vcvtne2ph2hf8_512((__v32hf)(__A),
181+
(__v32hf)(__B));
182+
}
183+
184+
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtne2ph_phf8(
185+
__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) {
186+
return (__m512i)__builtin_ia32_selectb_512(
187+
(__mmask64)__U, (__v64qi)_mm512_cvtne2ph_phf8(__A, __B), (__v64qi)__W);
188+
}
189+
190+
static __inline__ __m512i __DEFAULT_FN_ATTRS512
191+
_mm512_maskz_cvtne2ph_phf8(__mmask64 __U, __m512h __A, __m512h __B) {
192+
return (__m512i)__builtin_ia32_selectb_512(
193+
(__mmask64)__U, (__v64qi)_mm512_cvtne2ph_phf8(__A, __B),
194+
(__v64qi)(__m512i)_mm512_setzero_si512());
195+
}
196+
197+
static __inline__ __m512i __DEFAULT_FN_ATTRS512
198+
_mm512_cvtnes2ph_phf8(__m512h __A, __m512h __B) {
199+
return (__m512i)__builtin_ia32_vcvtne2ph2hf8s_512((__v32hf)(__A),
200+
(__v32hf)(__B));
201+
}
202+
203+
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtnes2ph_phf8(
204+
__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) {
205+
return (__m512i)__builtin_ia32_selectb_512(
206+
(__mmask64)__U, (__v64qi)_mm512_cvtnes2ph_phf8(__A, __B), (__v64qi)__W);
207+
}
208+
209+
static __inline__ __m512i __DEFAULT_FN_ATTRS512
210+
_mm512_maskz_cvtnes2ph_phf8(__mmask64 __U, __m512h __A, __m512h __B) {
211+
return (__m512i)__builtin_ia32_selectb_512(
212+
(__mmask64)__U, (__v64qi)_mm512_cvtnes2ph_phf8(__A, __B),
213+
(__v64qi)(__m512i)_mm512_setzero_si512());
214+
}
215+
216+
static __inline__ __m512h __DEFAULT_FN_ATTRS512
217+
_mm512_cvtnehf8_ph(__m256i __A) {
218+
return (__m512h)__builtin_ia32_vcvthf8_2ph512_mask(
219+
(__v32qi)__A, (__v32hf)(__m512h)_mm512_undefined_ph(), (__mmask32)-1);
220+
}
221+
222+
static __inline__ __m512h __DEFAULT_FN_ATTRS512
223+
_mm512_mask_cvtnehf8_ph(__m512h __W, __mmask32 __U, __m256i __A) {
224+
return (__m512h)__builtin_ia32_vcvthf8_2ph512_mask(
225+
(__v32qi)__A, (__v32hf)(__m512h)__W, (__mmask32)__U);
226+
}
227+
228+
static __inline__ __m512h __DEFAULT_FN_ATTRS512
229+
_mm512_maskz_cvtnehf8_ph(__mmask32 __U, __m256i __A) {
230+
return (__m512h)__builtin_ia32_vcvthf8_2ph512_mask(
231+
(__v32qi)__A, (__v32hf)(__m512h)_mm512_setzero_ph(), (__mmask32)__U);
232+
}
233+
234+
static __inline__ __m256i __DEFAULT_FN_ATTRS512
235+
_mm512_cvtneph_pbf8(__m512h __A) {
236+
return (__m256i)__builtin_ia32_vcvtneph2bf8_512_mask(
237+
(__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1);
238+
}
239+
240+
static __inline__ __m256i __DEFAULT_FN_ATTRS512
241+
_mm512_mask_cvtneph_pbf8(__m256i __W, __mmask32 __U, __m512h __A) {
242+
return (__m256i)__builtin_ia32_vcvtneph2bf8_512_mask(
243+
(__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U);
244+
}
245+
246+
static __inline__ __m256i __DEFAULT_FN_ATTRS512
247+
_mm512_maskz_cvtneph_pbf8(__mmask32 __U, __m512h __A) {
248+
return (__m256i)__builtin_ia32_vcvtneph2bf8_512_mask(
249+
(__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U);
250+
}
251+
252+
static __inline__ __m256i __DEFAULT_FN_ATTRS512
253+
_mm512_cvtnesph_pbf8(__m512h __A) {
254+
return (__m256i)__builtin_ia32_vcvtneph2bf8s_512_mask(
255+
(__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1);
256+
}
257+
258+
static __inline__ __m256i __DEFAULT_FN_ATTRS512
259+
_mm512_mask_cvtnesph_pbf8(__m256i __W, __mmask32 __U, __m512h __A) {
260+
return (__m256i)__builtin_ia32_vcvtneph2bf8s_512_mask(
261+
(__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U);
262+
}
263+
264+
static __inline__ __m256i __DEFAULT_FN_ATTRS512
265+
_mm512_maskz_cvtnesph_pbf8(__mmask32 __U, __m512h __A) {
266+
return (__m256i)__builtin_ia32_vcvtneph2bf8s_512_mask(
267+
(__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U);
268+
}
269+
270+
static __inline__ __m256i __DEFAULT_FN_ATTRS512
271+
_mm512_cvtneph_phf8(__m512h __A) {
272+
return (__m256i)__builtin_ia32_vcvtneph2hf8_512_mask(
273+
(__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1);
274+
}
275+
276+
static __inline__ __m256i __DEFAULT_FN_ATTRS512
277+
_mm512_mask_cvtneph_phf8(__m256i __W, __mmask32 __U, __m512h __A) {
278+
return (__m256i)__builtin_ia32_vcvtneph2hf8_512_mask(
279+
(__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U);
280+
}
281+
282+
static __inline__ __m256i __DEFAULT_FN_ATTRS512
283+
_mm512_maskz_cvtneph_phf8(__mmask32 __U, __m512h __A) {
284+
return (__m256i)__builtin_ia32_vcvtneph2hf8_512_mask(
285+
(__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U);
286+
}
287+
288+
static __inline__ __m256i __DEFAULT_FN_ATTRS512
289+
_mm512_cvtnesph_phf8(__m512h __A) {
290+
return (__m256i)__builtin_ia32_vcvtneph2hf8s_512_mask(
291+
(__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1);
292+
}
293+
294+
static __inline__ __m256i __DEFAULT_FN_ATTRS512
295+
_mm512_mask_cvtnesph_phf8(__m256i __W, __mmask32 __U, __m512h __A) {
296+
return (__m256i)__builtin_ia32_vcvtneph2hf8s_512_mask(
297+
(__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U);
298+
}
299+
300+
static __inline__ __m256i __DEFAULT_FN_ATTRS512
301+
_mm512_maskz_cvtnesph_phf8(__mmask32 __U, __m512h __A) {
302+
return (__m256i)__builtin_ia32_vcvtneph2hf8s_512_mask(
303+
(__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U);
304+
}
305+
306+
static __inline __m512h __DEFAULT_FN_ATTRS512 _mm512_cvtpbf8_ph(__m256i __A) {
307+
return _mm512_castsi512_ph(_mm512_slli_epi16(_mm512_cvtepi8_epi16(__A), 8));
308+
}
309+
310+
static __inline __m512h __DEFAULT_FN_ATTRS512
311+
_mm512_mask_cvtpbf8_ph(__m512h __S, __mmask16 __U, __m256i __A) {
312+
return _mm512_castsi512_ph(
313+
_mm512_mask_slli_epi16((__m512i)__S, __U, _mm512_cvtepi8_epi16(__A), 8));
314+
}
315+
316+
static __inline __m512h __DEFAULT_FN_ATTRS512
317+
_mm512_maskz_cvtpbf8_ph(__mmask16 __U, __m256i __A) {
318+
return _mm512_castsi512_ph(
319+
_mm512_slli_epi16(_mm512_maskz_cvtepi8_epi16(__U, __A), 8));
320+
}
321+
322+
#undef __DEFAULT_FN_ATTRS512
323+
324+
#endif // __AVX10_2_512CONVERTINTRIN_H
325+
#endif // __SSE2__

0 commit comments

Comments
 (0)