@@ -113,44 +113,21 @@ pub unsafe fn _mm256_or_ps(a: __m256, b: __m256) -> __m256 {
113
113
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_pd)
114
114
#[ inline]
115
115
#[ target_feature( enable = "avx" ) ]
116
- #[ cfg_attr( test, assert_instr( vshufpd, imm8 = 0x1 ) ) ]
117
- #[ rustc_args_required_const( 2 ) ]
118
- #[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
119
- pub unsafe fn _mm256_shuffle_pd ( a : __m256d , b : __m256d , imm8 : i32 ) -> __m256d {
120
- let imm8 = ( imm8 & 0xFF ) as u8 ;
121
- macro_rules! shuffle4 {
122
- ( $a: expr, $b: expr, $c: expr, $d: expr) => {
123
- simd_shuffle4( a, b, [ $a, $b, $c, $d] )
124
- } ;
125
- }
126
- macro_rules! shuffle3 {
127
- ( $a: expr, $b: expr, $c: expr) => {
128
- match ( imm8 >> 3 ) & 0x1 {
129
- 0 => shuffle4!( $a, $b, $c, 6 ) ,
130
- _ => shuffle4!( $a, $b, $c, 7 ) ,
131
- }
132
- } ;
133
- }
134
- macro_rules! shuffle2 {
135
- ( $a: expr, $b: expr) => {
136
- match ( imm8 >> 2 ) & 0x1 {
137
- 0 => shuffle3!( $a, $b, 2 ) ,
138
- _ => shuffle3!( $a, $b, 3 ) ,
139
- }
140
- } ;
141
- }
142
- macro_rules! shuffle1 {
143
- ( $a: expr) => {
144
- match ( imm8 >> 1 ) & 0x1 {
145
- 0 => shuffle2!( $a, 4 ) ,
146
- _ => shuffle2!( $a, 5 ) ,
147
- }
148
- } ;
149
- }
150
- match imm8 & 0x1 {
151
- 0 => shuffle1 ! ( 0 ) ,
152
- _ => shuffle1 ! ( 1 ) ,
153
- }
116
+ #[ cfg_attr( test, assert_instr( vshufpd, MASK = 3 ) ) ]
117
+ #[ rustc_legacy_const_generics( 2 ) ]
118
+ #[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
119
+ pub unsafe fn _mm256_shuffle_pd < const MASK : i32 > ( a : __m256d , b : __m256d ) -> __m256d {
120
+ static_assert_imm8 ! ( MASK ) ;
121
+ simd_shuffle4 (
122
+ a,
123
+ b,
124
+ [
125
+ MASK as u32 & 0b1 ,
126
+ ( ( MASK as u32 >> 1 ) & 0b1 ) + 4 ,
127
+ ( ( MASK as u32 >> 2 ) & 0b1 ) + 2 ,
128
+ ( ( MASK as u32 >> 3 ) & 0b1 ) + 6 ,
129
+ ] ,
130
+ )
154
131
}
155
132
156
133
/// Shuffles single-precision (32-bit) floating-point elements in `a` within
@@ -159,61 +136,25 @@ pub unsafe fn _mm256_shuffle_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d {
159
136
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_ps)
160
137
#[ inline]
161
138
#[ target_feature( enable = "avx" ) ]
162
- #[ cfg_attr( test, assert_instr( vshufps, imm8 = 0x0 ) ) ]
163
- #[ rustc_args_required_const( 2 ) ]
164
- #[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
165
- pub unsafe fn _mm256_shuffle_ps ( a : __m256 , b : __m256 , imm8 : i32 ) -> __m256 {
166
- let imm8 = ( imm8 & 0xFF ) as u8 ;
167
- macro_rules! shuffle4 {
168
- (
169
- $a: expr,
170
- $b: expr,
171
- $c: expr,
172
- $d: expr,
173
- $e: expr,
174
- $f: expr,
175
- $g: expr,
176
- $h: expr
177
- ) => {
178
- simd_shuffle8( a, b, [ $a, $b, $c, $d, $e, $f, $g, $h] )
179
- } ;
180
- }
181
- macro_rules! shuffle3 {
182
- ( $a: expr, $b: expr, $c: expr, $e: expr, $f: expr, $g: expr) => {
183
- match ( imm8 >> 6 ) & 0x3 {
184
- 0 => shuffle4!( $a, $b, $c, 8 , $e, $f, $g, 12 ) ,
185
- 1 => shuffle4!( $a, $b, $c, 9 , $e, $f, $g, 13 ) ,
186
- 2 => shuffle4!( $a, $b, $c, 10 , $e, $f, $g, 14 ) ,
187
- _ => shuffle4!( $a, $b, $c, 11 , $e, $f, $g, 15 ) ,
188
- }
189
- } ;
190
- }
191
- macro_rules! shuffle2 {
192
- ( $a: expr, $b: expr, $e: expr, $f: expr) => {
193
- match ( imm8 >> 4 ) & 0x3 {
194
- 0 => shuffle3!( $a, $b, 8 , $e, $f, 12 ) ,
195
- 1 => shuffle3!( $a, $b, 9 , $e, $f, 13 ) ,
196
- 2 => shuffle3!( $a, $b, 10 , $e, $f, 14 ) ,
197
- _ => shuffle3!( $a, $b, 11 , $e, $f, 15 ) ,
198
- }
199
- } ;
200
- }
201
- macro_rules! shuffle1 {
202
- ( $a: expr, $e: expr) => {
203
- match ( imm8 >> 2 ) & 0x3 {
204
- 0 => shuffle2!( $a, 0 , $e, 4 ) ,
205
- 1 => shuffle2!( $a, 1 , $e, 5 ) ,
206
- 2 => shuffle2!( $a, 2 , $e, 6 ) ,
207
- _ => shuffle2!( $a, 3 , $e, 7 ) ,
208
- }
209
- } ;
210
- }
211
- match imm8 & 0x3 {
212
- 0 => shuffle1 ! ( 0 , 4 ) ,
213
- 1 => shuffle1 ! ( 1 , 5 ) ,
214
- 2 => shuffle1 ! ( 2 , 6 ) ,
215
- _ => shuffle1 ! ( 3 , 7 ) ,
216
- }
139
+ #[ cfg_attr( test, assert_instr( vshufps, MASK = 3 ) ) ]
140
+ #[ rustc_legacy_const_generics( 2 ) ]
141
+ #[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
142
+ pub unsafe fn _mm256_shuffle_ps < const MASK : i32 > ( a : __m256 , b : __m256 ) -> __m256 {
143
+ static_assert_imm8 ! ( MASK ) ;
144
+ simd_shuffle8 (
145
+ a,
146
+ b,
147
+ [
148
+ MASK as u32 & 0b11 ,
149
+ ( MASK as u32 >> 2 ) & 0b11 ,
150
+ ( ( MASK as u32 >> 4 ) & 0b11 ) + 8 ,
151
+ ( ( MASK as u32 >> 6 ) & 0b11 ) + 8 ,
152
+ ( MASK as u32 & 0b11 ) + 4 ,
153
+ ( ( MASK as u32 >> 2 ) & 0b11 ) + 4 ,
154
+ ( ( MASK as u32 >> 4 ) & 0b11 ) + 12 ,
155
+ ( ( MASK as u32 >> 6 ) & 0b11 ) + 12 ,
156
+ ] ,
157
+ )
217
158
}
218
159
219
160
/// Computes the bitwise NOT of packed double-precision (64-bit) floating-point
@@ -3381,7 +3322,7 @@ mod tests {
3381
3322
unsafe fn test_mm256_shuffle_pd ( ) {
3382
3323
let a = _mm256_setr_pd ( 1. , 4. , 5. , 8. ) ;
3383
3324
let b = _mm256_setr_pd ( 2. , 3. , 6. , 7. ) ;
3384
- let r = _mm256_shuffle_pd ( a, b, 0xF ) ;
3325
+ let r = _mm256_shuffle_pd :: < 0b11_11_11_11 > ( a, b) ;
3385
3326
let e = _mm256_setr_pd ( 4. , 3. , 8. , 7. ) ;
3386
3327
assert_eq_m256d ( r, e) ;
3387
3328
}
@@ -3390,7 +3331,7 @@ mod tests {
3390
3331
unsafe fn test_mm256_shuffle_ps ( ) {
3391
3332
let a = _mm256_setr_ps ( 1. , 4. , 5. , 8. , 9. , 12. , 13. , 16. ) ;
3392
3333
let b = _mm256_setr_ps ( 2. , 3. , 6. , 7. , 10. , 11. , 14. , 15. ) ;
3393
- let r = _mm256_shuffle_ps ( a, b, 0x0F ) ;
3334
+ let r = _mm256_shuffle_ps :: < 0b00_00_11_11 > ( a, b) ;
3394
3335
let e = _mm256_setr_ps ( 8. , 8. , 2. , 2. , 16. , 16. , 10. , 10. ) ;
3395
3336
assert_eq_m256 ( r, e) ;
3396
3337
}
0 commit comments