|
1 |
| -//! PowerPC Vectir Scalar eXtensions (VSX) intrinsics. |
| 1 | +//! PowerPC Vector Scalar eXtensions (VSX) intrinsics. |
2 | 2 | //!
|
3 | 3 | //! The references are: [POWER ISA v2.07B (for POWER8 & POWER8 with NVIDIA
|
4 | 4 | //! NVlink)] and [POWER ISA v3.0B (for POWER9)].
|
5 | 5 | //!
|
6 | 6 | //! [POWER ISA v2.07B (for POWER8 & POWER8 with NVIDIA NVlink)]: https://ibm.box.com/s/jd5w15gz301s5b5dt375mshpq9c3lh4u
|
7 | 7 | //! [POWER ISA v3.0B (for POWER9)]: https://ibm.box.com/s/1hzcwkwf8rbju5h9iyf44wm94amnlcrv
|
8 | 8 |
|
9 |
| -//#[cfg(test)] |
10 |
| -//use stdsimd_test::assert_instr; |
11 |
| -//use coresimd::simd_llvm::simd_add; |
12 |
| -//use coresimd::simd::*; |
| 9 | +#![allow(non_camel_case_types)] |
| 10 | + |
| 11 | +use coresimd::simd_llvm::*; |
| 12 | + |
| 13 | +#[cfg(test)] |
| 14 | +use stdsimd_test::assert_instr; |
| 15 | + |
| 16 | +use mem; |
| 17 | + |
| 18 | +types! { |
| 19 | + // pub struct vector_Float16 = f16x8; |
| 20 | + /// PowerPC-specific 128-bit wide vector of two packed `i64` |
| 21 | + pub struct vector_signed_long(i64, i64); |
| 22 | + /// PowerPC-specific 128-bit wide vector of two packed `u64` |
| 23 | + pub struct vector_unsigned_long(u64, u64); |
| 24 | + /// PowerPC-specific 128-bit wide vector mask of two elements |
| 25 | + pub struct vector_bool_long(i64, i64); |
| 26 | + /// PowerPC-specific 128-bit wide vector of two packed `f64` |
| 27 | + pub struct vector_double(f64, f64); |
| 28 | + // pub struct vector_signed_long_long = vector_signed_long; |
| 29 | + // pub struct vector_unsigned_long_long = vector_unsigned_long; |
| 30 | + // pub struct vector_bool_long_long = vector_bool_long; |
| 31 | + // pub struct vector_signed___int128 = i128x1; |
| 32 | + // pub struct vector_unsigned___int128 = i128x1; |
| 33 | +} |
| 34 | + |
| 35 | +mod sealed { |
| 36 | + use coresimd::simd::*; |
| 37 | + use super::*; |
| 38 | + |
| 39 | + pub trait VectorPermDI { |
| 40 | + unsafe fn vec_xxpermdi(self, b: Self, dm: u8) -> Self; |
| 41 | + } |
| 42 | + |
| 43 | + // xxpermdi has an big-endian bias and extended mnemonics |
| 44 | + #[inline] |
| 45 | + #[target_feature(enable = "vsx")] |
| 46 | + #[cfg_attr( |
| 47 | + all(test, target_endian = "little"), assert_instr(xxmrgld, dm = 0x0) |
| 48 | + )] |
| 49 | + #[cfg_attr( |
| 50 | + all(test, target_endian = "big"), assert_instr(xxspltd, dm = 0x0) |
| 51 | + )] |
| 52 | + unsafe fn xxpermdi(a: i64x2, b: i64x2, dm: u8) -> i64x2 { |
| 53 | + match dm & 0b11 { |
| 54 | + 0 => simd_shuffle2(a, b, [0b00, 0b10]), |
| 55 | + 1 => simd_shuffle2(a, b, [0b01, 0b10]), |
| 56 | + 2 => simd_shuffle2(a, b, [0b00, 0b11]), |
| 57 | + _ => simd_shuffle2(a, b, [0b01, 0b11]), |
| 58 | + } |
| 59 | + } |
| 60 | + |
| 61 | + macro_rules! vec_xxpermdi { |
| 62 | + {$impl: ident} => { |
| 63 | + impl VectorPermDI for $impl { |
| 64 | + #[inline] |
| 65 | + #[target_feature(enable = "vsx")] |
| 66 | + unsafe fn vec_xxpermdi(self, b: Self, dm: u8) -> Self { |
| 67 | + mem::transmute(xxpermdi(mem::transmute(self), mem::transmute(b), dm)) |
| 68 | + } |
| 69 | + } |
| 70 | + } |
| 71 | + } |
| 72 | + |
| 73 | + vec_xxpermdi! { vector_unsigned_long } |
| 74 | + vec_xxpermdi! { vector_signed_long } |
| 75 | + vec_xxpermdi! { vector_bool_long } |
| 76 | + vec_xxpermdi! { vector_double } |
| 77 | +} |
| 78 | + |
| 79 | +/// Vector permute. |
| 80 | +#[inline] |
| 81 | +#[target_feature(enable = "vsx")] |
| 82 | +#[rustc_args_required_const(2)] |
| 83 | +pub unsafe fn vec_xxpermdi<T>(a: T, b: T, dm: u8) -> T |
| 84 | +where |
| 85 | + T: sealed::VectorPermDI, |
| 86 | +{ |
| 87 | + a.vec_xxpermdi(b, dm) |
| 88 | +} |
| 89 | + |
| 90 | +#[cfg(test)] |
| 91 | +mod tests { |
| 92 | + #[cfg(target_arch = "powerpc")] |
| 93 | + use coresimd::arch::powerpc::*; |
| 94 | + |
| 95 | + #[cfg(target_arch = "powerpc64")] |
| 96 | + use coresimd::arch::powerpc64::*; |
| 97 | + |
| 98 | + use coresimd::simd::*; |
| 99 | + use stdsimd_test::simd_test; |
| 100 | + |
| 101 | + macro_rules! test_vec_xxpermdi { |
| 102 | + {$name:ident, $shorttype:ident, $longtype:ident, [$($a:expr),+], [$($b:expr),+], [$($c:expr),+], [$($d:expr),+]} => { |
| 103 | + #[simd_test(enable = "vsx")] |
| 104 | + unsafe fn $name() { |
| 105 | + let a: $longtype = ::mem::transmute($shorttype::new($($a),+, $($b),+)); |
| 106 | + let b = ::mem::transmute($shorttype::new($($c),+, $($d),+)); |
| 107 | + |
| 108 | + assert_eq!($shorttype::new($($a),+, $($c),+), ::mem::transmute(vec_xxpermdi(a, b, 0))); |
| 109 | + assert_eq!($shorttype::new($($b),+, $($c),+), ::mem::transmute(vec_xxpermdi(a, b, 1))); |
| 110 | + assert_eq!($shorttype::new($($a),+, $($d),+), ::mem::transmute(vec_xxpermdi(a, b, 2))); |
| 111 | + assert_eq!($shorttype::new($($b),+, $($d),+), ::mem::transmute(vec_xxpermdi(a, b, 3))); |
| 112 | + } |
| 113 | + } |
| 114 | + } |
| 115 | + |
| 116 | + test_vec_xxpermdi!{test_vec_xxpermdi_u64x2, u64x2, vector_unsigned_long, [0], [1], [2], [3]} |
| 117 | + test_vec_xxpermdi!{test_vec_xxpermdi_i64x2, i64x2, vector_signed_long, [0], [-1], [2], [-3]} |
| 118 | + test_vec_xxpermdi!{test_vec_xxpermdi_m64x2, m64x2, vector_bool_long, [false], [true], [false], [true]} |
| 119 | + test_vec_xxpermdi!{test_vec_xxpermdi_f64x2, f64x2, vector_double, [0.0], [1.0], [2.0], [3.0]} |
| 120 | +} |
0 commit comments