Skip to content

Commit b0425e6

Browse files
authored
Convert shuffle_ps and shuffle_pd to const generics (rust-lang#1037)
1 parent 0d4c2f8 commit b0425e6

File tree

5 files changed

+676
-1139
lines changed

5 files changed

+676
-1139
lines changed

crates/core_arch/src/x86/avx.rs

+36-95
Original file line numberDiff line numberDiff line change
@@ -113,44 +113,21 @@ pub unsafe fn _mm256_or_ps(a: __m256, b: __m256) -> __m256 {
113113
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_pd)
114114
#[inline]
115115
#[target_feature(enable = "avx")]
116-
#[cfg_attr(test, assert_instr(vshufpd, imm8 = 0x1))]
117-
#[rustc_args_required_const(2)]
118-
#[stable(feature = "simd_x86", since = "1.27.0")]
119-
pub unsafe fn _mm256_shuffle_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d {
120-
let imm8 = (imm8 & 0xFF) as u8;
121-
macro_rules! shuffle4 {
122-
($a:expr, $b:expr, $c:expr, $d:expr) => {
123-
simd_shuffle4(a, b, [$a, $b, $c, $d])
124-
};
125-
}
126-
macro_rules! shuffle3 {
127-
($a:expr, $b:expr, $c:expr) => {
128-
match (imm8 >> 3) & 0x1 {
129-
0 => shuffle4!($a, $b, $c, 6),
130-
_ => shuffle4!($a, $b, $c, 7),
131-
}
132-
};
133-
}
134-
macro_rules! shuffle2 {
135-
($a:expr, $b:expr) => {
136-
match (imm8 >> 2) & 0x1 {
137-
0 => shuffle3!($a, $b, 2),
138-
_ => shuffle3!($a, $b, 3),
139-
}
140-
};
141-
}
142-
macro_rules! shuffle1 {
143-
($a:expr) => {
144-
match (imm8 >> 1) & 0x1 {
145-
0 => shuffle2!($a, 4),
146-
_ => shuffle2!($a, 5),
147-
}
148-
};
149-
}
150-
match imm8 & 0x1 {
151-
0 => shuffle1!(0),
152-
_ => shuffle1!(1),
153-
}
116+
#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
117+
#[rustc_legacy_const_generics(2)]
118+
#[stable(feature = "simd_x86", since = "1.27.0")]
119+
pub unsafe fn _mm256_shuffle_pd<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d {
120+
static_assert_imm8!(MASK);
121+
simd_shuffle4(
122+
a,
123+
b,
124+
[
125+
MASK as u32 & 0b1,
126+
((MASK as u32 >> 1) & 0b1) + 4,
127+
((MASK as u32 >> 2) & 0b1) + 2,
128+
((MASK as u32 >> 3) & 0b1) + 6,
129+
],
130+
)
154131
}
155132

156133
/// Shuffles single-precision (32-bit) floating-point elements in `a` within
@@ -159,61 +136,25 @@ pub unsafe fn _mm256_shuffle_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d {
159136
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_ps)
160137
#[inline]
161138
#[target_feature(enable = "avx")]
162-
#[cfg_attr(test, assert_instr(vshufps, imm8 = 0x0))]
163-
#[rustc_args_required_const(2)]
164-
#[stable(feature = "simd_x86", since = "1.27.0")]
165-
pub unsafe fn _mm256_shuffle_ps(a: __m256, b: __m256, imm8: i32) -> __m256 {
166-
let imm8 = (imm8 & 0xFF) as u8;
167-
macro_rules! shuffle4 {
168-
(
169-
$a:expr,
170-
$b:expr,
171-
$c:expr,
172-
$d:expr,
173-
$e:expr,
174-
$f:expr,
175-
$g:expr,
176-
$h:expr
177-
) => {
178-
simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h])
179-
};
180-
}
181-
macro_rules! shuffle3 {
182-
($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr) => {
183-
match (imm8 >> 6) & 0x3 {
184-
0 => shuffle4!($a, $b, $c, 8, $e, $f, $g, 12),
185-
1 => shuffle4!($a, $b, $c, 9, $e, $f, $g, 13),
186-
2 => shuffle4!($a, $b, $c, 10, $e, $f, $g, 14),
187-
_ => shuffle4!($a, $b, $c, 11, $e, $f, $g, 15),
188-
}
189-
};
190-
}
191-
macro_rules! shuffle2 {
192-
($a:expr, $b:expr, $e:expr, $f:expr) => {
193-
match (imm8 >> 4) & 0x3 {
194-
0 => shuffle3!($a, $b, 8, $e, $f, 12),
195-
1 => shuffle3!($a, $b, 9, $e, $f, 13),
196-
2 => shuffle3!($a, $b, 10, $e, $f, 14),
197-
_ => shuffle3!($a, $b, 11, $e, $f, 15),
198-
}
199-
};
200-
}
201-
macro_rules! shuffle1 {
202-
($a:expr, $e:expr) => {
203-
match (imm8 >> 2) & 0x3 {
204-
0 => shuffle2!($a, 0, $e, 4),
205-
1 => shuffle2!($a, 1, $e, 5),
206-
2 => shuffle2!($a, 2, $e, 6),
207-
_ => shuffle2!($a, 3, $e, 7),
208-
}
209-
};
210-
}
211-
match imm8 & 0x3 {
212-
0 => shuffle1!(0, 4),
213-
1 => shuffle1!(1, 5),
214-
2 => shuffle1!(2, 6),
215-
_ => shuffle1!(3, 7),
216-
}
139+
#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
140+
#[rustc_legacy_const_generics(2)]
141+
#[stable(feature = "simd_x86", since = "1.27.0")]
142+
pub unsafe fn _mm256_shuffle_ps<const MASK: i32>(a: __m256, b: __m256) -> __m256 {
143+
static_assert_imm8!(MASK);
144+
simd_shuffle8(
145+
a,
146+
b,
147+
[
148+
MASK as u32 & 0b11,
149+
(MASK as u32 >> 2) & 0b11,
150+
((MASK as u32 >> 4) & 0b11) + 8,
151+
((MASK as u32 >> 6) & 0b11) + 8,
152+
(MASK as u32 & 0b11) + 4,
153+
((MASK as u32 >> 2) & 0b11) + 4,
154+
((MASK as u32 >> 4) & 0b11) + 12,
155+
((MASK as u32 >> 6) & 0b11) + 12,
156+
],
157+
)
217158
}
218159

219160
/// Computes the bitwise NOT of packed double-precision (64-bit) floating-point
@@ -3381,7 +3322,7 @@ mod tests {
33813322
unsafe fn test_mm256_shuffle_pd() {
33823323
let a = _mm256_setr_pd(1., 4., 5., 8.);
33833324
let b = _mm256_setr_pd(2., 3., 6., 7.);
3384-
let r = _mm256_shuffle_pd(a, b, 0xF);
3325+
let r = _mm256_shuffle_pd::<0b11_11_11_11>(a, b);
33853326
let e = _mm256_setr_pd(4., 3., 8., 7.);
33863327
assert_eq_m256d(r, e);
33873328
}
@@ -3390,7 +3331,7 @@ mod tests {
33903331
unsafe fn test_mm256_shuffle_ps() {
33913332
let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
33923333
let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
3393-
let r = _mm256_shuffle_ps(a, b, 0x0F);
3334+
let r = _mm256_shuffle_ps::<0b00_00_11_11>(a, b);
33943335
let e = _mm256_setr_ps(8., 8., 2., 2., 16., 16., 10., 10.);
33953336
assert_eq_m256(r, e);
33963337
}

0 commit comments

Comments
 (0)